diff options
author | Chris Lattner <sabre@nondot.org> | 2002-09-17 23:03:35 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2002-09-17 23:03:35 +0000 |
commit | 4f372f0f415ef6e04fbc305028d16bf6a89db5ae (patch) | |
tree | 3e6129ee8fba037730f024b714ba0807536a2a77 /utils | |
parent | 633a5b1aacb135957b20e5f11e779ea23ccb9619 (diff) | |
download | external_llvm-4f372f0f415ef6e04fbc305028d16bf6a89db5ae.zip external_llvm-4f372f0f415ef6e04fbc305028d16bf6a89db5ae.tar.gz external_llvm-4f372f0f415ef6e04fbc305028d16bf6a89db5ae.tar.bz2 |
Initial checkin of burg documetnation files
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@3786 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'utils')
-rw-r--r-- | utils/Burg/Doc/Makefile | 84 | ||||
-rw-r--r-- | utils/Burg/Doc/doc.aux | 50 | ||||
-rw-r--r-- | utils/Burg/Doc/doc.dvi | bin | 0 -> 29856 bytes | |||
-rw-r--r-- | utils/Burg/Doc/doc.log | 157 | ||||
-rw-r--r-- | utils/Burg/Doc/doc.tex | 596 |
5 files changed, 887 insertions, 0 deletions
diff --git a/utils/Burg/Doc/Makefile b/utils/Burg/Doc/Makefile new file mode 100644 index 0000000..226210d --- /dev/null +++ b/utils/Burg/Doc/Makefile @@ -0,0 +1,84 @@ +# $Id$ + +#CFLAGS = +#CFLAGS = -O +#CFLAGS = -O -DNOLEX +CFLAGS = -g -DDEBUG +#CFLAGS = -g -DNOLEX -DDEBUG + +SRCS = \ + be.c \ + burs.c \ + closure.c \ + delta.c \ + fe.c \ + item.c \ + lex.c \ + list.c \ + main.c \ + map.c \ + nonterminal.c \ + operator.c \ + pattern.c \ + plank.c \ + queue.c \ + rule.c \ + string.c \ + symtab.c \ + table.c \ + trim.c \ + zalloc.c + +BU_OBJS = \ + burs.o \ + closure.o \ + delta.o \ + item.o \ + list.o \ + map.o \ + nonterminal.o \ + operator.o \ + pattern.o \ + queue.o \ + rule.o \ + table.o \ + trim.o \ + zalloc.o + +FE_OBJS = \ + be.o \ + fe.o \ + lex.o \ + main.o \ + plank.o \ + string.o \ + symtab.o \ + y.tab.o + +all: test + +burg: $(BU_OBJS) $(FE_OBJS) + $(CC) -o burg $(CFLAGS) $(BU_OBJS) $(FE_OBJS) + +y.tab.c y.tab.h: gram.y + yacc -d gram.y + +clean: + rm -f *.o y.tab.h y.tab.c core burg *.aux *.log *.dvi sample sample.c tmp + +$(FE_OBJS): b.h +$(BU_OBJS): b.h +$(FE_OBJS): fe.h + +lex.o: y.tab.h + +doc.dvi: doc.tex + latex doc; latex doc + +test: burg sample.gr + ./burg -I <sample.gr >sample.c && cc $(CFLAGS) -o sample sample.c && ./sample + ./burg -I sample.gr >tmp && cmp tmp sample.c + ./burg -I <sample.gr -o tmp && cmp tmp sample.c + ./burg -I sample.gr -o tmp && cmp tmp sample.c + ./burg -I -O0 <sample.gr >tmp && cmp tmp sample.c + ./burg -I -= <sample.gr >tmp && cmp tmp sample.c diff --git a/utils/Burg/Doc/doc.aux b/utils/Burg/Doc/doc.aux new file mode 100644 index 0000000..0f7c13f --- /dev/null +++ b/utils/Burg/Doc/doc.aux @@ -0,0 +1,50 @@ +\relax +\bibstyle{alpha} +\citation{aho-twig-toplas} +\citation{appel-87} +\citation{balachandran-complang} +\citation{kron-phd} +\citation{hoffmann-jacm} +\citation{hatcher-popl} +\citation{chase-popl} +\citation{pelegri-popl} +\citation{pelegri-phd} +\citation{wilhelm-tr} +\citation{henry-budp} +\citation{fraser-henry-spe-91} +\citation{proebsting-91} +\@writefile{toc}{\contentsline {section}{\numberline {1}Overview}{1}} +\@writefile{toc}{\contentsline {section}{\numberline {2}Input}{1}} +\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces A Sample Tree Grammar}}{2}} +\newlabel{fig-tree-grammar}{{1}{2}} +\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces EBNF Grammar for Tree Grammars for {\sc Burg}\ }}{3}} +\newlabel{fig-grammar-grammar}{{2}{3}} +\@writefile{toc}{\contentsline {section}{\numberline {3}Output}{3}} +\citation{aho-johnson-dp-classic} +\citation{fraser-henry-spe-91} +\citation{henry-budp} +\citation{pelegri-phd} +\@writefile{toc}{\contentsline {section}{\numberline {4}Debugging}{6}} +\@writefile{toc}{\contentsline {section}{\numberline {5}Running {\sc Burg}\ }{6}} +\newlabel{sec-man-page}{{5}{6}} +\citation{pelegri-popl} +\citation{henry-budp} +\citation{balachandran-complang} +\citation{proebsting-91} +\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces A Diverging Tree Grammar}}{7}} +\newlabel{fig-diverge-grammar}{{3}{7}} +\@writefile{toc}{\contentsline {section}{\numberline {6}Acknowledgements}{7}} +\bibcite{aho-twig-toplas}{AGT89} +\bibcite{aho-johnson-dp-classic}{AJ76} +\bibcite{appel-87}{App87} +\bibcite{balachandran-complang}{BDB90} +\bibcite{wilhelm-tr}{BMW87} +\bibcite{chase-popl}{Cha87} +\bibcite{fraser-henry-spe-91}{FH91} +\bibcite{hatcher-popl}{HC86} +\bibcite{henry-budp}{Hen89} +\bibcite{hoffmann-jacm}{HO82} +\bibcite{kron-phd}{Kro75} +\bibcite{pelegri-phd}{PL87} +\bibcite{pelegri-popl}{PLG88} +\bibcite{proebsting-91}{Pro91} diff --git a/utils/Burg/Doc/doc.dvi b/utils/Burg/Doc/doc.dvi Binary files differnew file mode 100644 index 0000000..3211f32 --- /dev/null +++ b/utils/Burg/Doc/doc.dvi diff --git a/utils/Burg/Doc/doc.log b/utils/Burg/Doc/doc.log new file mode 100644 index 0000000..a224a4e --- /dev/null +++ b/utils/Burg/Doc/doc.log @@ -0,0 +1,157 @@ +This is TeX, Version 3.14159 (Web2C 7.3.2) (format=latex 2000.8.30) 4 JUN 2001 13:20 +**doc +(doc.tex +LaTeX2e <2000/06/01> +(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/base/latex209.def +File: latex209.def 1998/05/13 v0.52 Standard LaTeX file + + + Entering LaTeX 2.09 COMPATIBILITY MODE + ************************************************************* + !!WARNING!! !!WARNING!! !!WARNING!! !!WARNING!! + + This mode attempts to provide an emulation of the LaTeX 2.09 + author environment so that OLD documents can be successfully + processed. It should NOT be used for NEW documents! + + New documents should use Standard LaTeX conventions and start + with the \documentclass command. + + Compatibility mode is UNLIKELY TO WORK with LaTeX 2.09 style + files that change any internal macros, especially not with + those that change the FONT SELECTION or OUTPUT ROUTINES. + + Therefore such style files MUST BE UPDATED to use + Current Standard LaTeX: LaTeX2e. + If you suspect that you may be using such a style file, which + is probably very, very old by now, then you should attempt to + get it updated by sending a copy of this error message to the + author of that file. + ************************************************************* + +\footheight=\dimen102 +\@maxsep=\dimen103 +\@dblmaxsep=\dimen104 +\@cla=\count79 +\@clb=\count80 +\mscount=\count81 +(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/base/tracefnt.sty +Package: tracefnt 1997/05/29 v3.0j Standard LaTeX package (font tracing) +\tracingfonts=\count82 +LaTeX Info: Redefining \selectfont on input line 96. +) +\symbold=\mathgroup4 +\symsans=\mathgroup5 +\symtypewriter=\mathgroup6 +\symitalic=\mathgroup7 +\symsmallcaps=\mathgroup8 +\symslanted=\mathgroup9 +LaTeX Font Info: Redeclaring math alphabet \mathbf on input line 288. +LaTeX Font Info: Redeclaring math alphabet \mathsf on input line 289. +LaTeX Font Info: Redeclaring math alphabet \mathtt on input line 290. +LaTeX Font Info: Redeclaring math alphabet \mathit on input line 296. +LaTeX Info: Redefining \em on input line 306. +(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/base/latexsym.sty +Package: latexsym 1998/08/17 v2.2e Standard LaTeX package (lasy symbols) +\symlasy=\mathgroup10 +LaTeX Font Info: Overwriting symbol font `lasy' in version `bold' +(Font) U/lasy/m/n --> U/lasy/b/n on input line 42. +) +LaTeX Font Info: Redeclaring math delimiter \lgroup on input line 370. +LaTeX Font Info: Redeclaring math delimiter \rgroup on input line 372. +LaTeX Font Info: Redeclaring math delimiter \bracevert on input line 374. + +(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/config/latex209.cf +g +(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/tools/rawfonts.sty +Compatibility mode: package `' requested, but `rawfonts' provided. +Package: rawfonts 1994/05/08 Low-level LaTeX 2.09 font compatibility + +(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/tools/somedefs.sty +Package: somedefs 1994/06/01 Toolkit for optional definitions +) +LaTeX Font Info: Try loading font information for U+lasy on input line 44. + (/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/base/ulasy.fd +File: ulasy.fd 1998/08/17 v2.2eLaTeX symbol font definitions +)))) (/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/base/article. +cls +Document Class: article 2000/05/19 v1.4b Standard LaTeX document class +(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/base/size11.clo +File: size11.clo 2000/05/19 v1.4b Standard LaTeX file (size option) +) +\c@part=\count83 +\c@section=\count84 +\c@subsection=\count85 +\c@subsubsection=\count86 +\c@paragraph=\count87 +\c@subparagraph=\count88 +\c@figure=\count89 +\c@table=\count90 +\abovecaptionskip=\skip41 +\belowcaptionskip=\skip42 +Compatibility mode: definition of \rm ignored. +Compatibility mode: definition of \sf ignored. +Compatibility mode: definition of \tt ignored. +Compatibility mode: definition of \bf ignored. +Compatibility mode: definition of \it ignored. +Compatibility mode: definition of \sl ignored. +Compatibility mode: definition of \sc ignored. +LaTeX Info: Redefining \cal on input line 501. +LaTeX Info: Redefining \mit on input line 502. +\bibindent=\dimen105 +) +(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/pstex/fullpage.sty +) (doc.aux) +\openout1 = `doc.aux'. + +LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 2. +LaTeX Font Info: ... okay on input line 2. +LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 2. +LaTeX Font Info: ... okay on input line 2. +LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 2. +LaTeX Font Info: ... okay on input line 2. +LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 2. +LaTeX Font Info: ... okay on input line 2. +LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 2. +LaTeX Font Info: ... okay on input line 2. +LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 2. +LaTeX Font Info: ... okay on input line 2. +LaTeX Font Info: External font `cmex10' loaded for size +(Font) <12> on input line 33. +LaTeX Font Info: External font `cmex10' loaded for size +(Font) <8> on input line 33. +LaTeX Font Info: External font `cmex10' loaded for size +(Font) <6> on input line 33. +LaTeX Font Info: Try loading font information for OMS+cmtt on input line 100 +. +LaTeX Font Info: No file OMScmtt.fd. on input line 100. +LaTeX Font Warning: Font shape `OMS/cmtt/m/n' undefined +(Font) using `OMS/cmsy/m/n' instead +(Font) for symbol `textbraceleft' on input line 100. + [1 + +] +LaTeX Font Info: External font `cmex10' loaded for size +(Font) <10.95> on input line 150. + [2] [3] [4] [5] [6] +Overfull \hbox (1.38191pt too wide) in paragraph at lines 480--484 +[]\OT1/cmr/m/n/10.95 Emit code for \OT1/cmtt/m/n/10.95 burm[]arity\OT1/cmr/m/n/ +10.95 , \OT1/cmtt/m/n/10.95 burm[]child\OT1/cmr/m/n/10.95 , \OT1/cmtt/m/n/10.95 + burm[]cost\OT1/cmr/m/n/10.95 , \OT1/cmtt/m/n/10.95 burm[]ntname\OT1/cmr/m/n/10 +.95 , \OT1/cmtt/m/n/10.95 burm[]op[]label\OT1/cmr/m/n/10.95 , \OT1/cmtt/m/n/10. +95 burm[]opname\OT1/cmr/m/n/10.95 , + [] + +[7] [8] [9] (doc.aux) +LaTeX Font Warning: Some font shapes were not available, defaults substituted. + ) +Here is how much of TeX's memory you used: + 543 strings out of 12968 + 6147 string characters out of 289029 + 446019 words of memory out of 1453895 + 3433 multiletter control sequences out of 10000+10000 + 23403 words of font info for 87 fonts, out of 400000 for 2000 + 14 hyphenation exceptions out of 1000 + 21i,6n,20p,308b,283s stack positions out of 300i,100n,500p,50000b,4000s + +Output written on doc.dvi (9 pages, 29856 bytes). diff --git a/utils/Burg/Doc/doc.tex b/utils/Burg/Doc/doc.tex new file mode 100644 index 0000000..3dc67be --- /dev/null +++ b/utils/Burg/Doc/doc.tex @@ -0,0 +1,596 @@ +\documentstyle[11pt,fullpage]{article} +\begin{document} + +\def\AddSpace#1{\ifcat#1a\ \fi#1} % if next is a letter, add a space +\def\YACC#1{{\sc Yacc}\AddSpace#1} +\def\TWIG#1{{\sc Twig}\AddSpace#1} +\def\PROG#1{{\sc Burg}\AddSpace#1} +\def\PARSER#1{{\sc Burm}\AddSpace#1} +\def\CODEGEN#1{{\sc Codegen}\AddSpace#1} + +\title{{\sc Burg} --- Fast Optimal Instruction Selection and Tree Parsing} +\author{ +Christopher W. Fraser \\ +AT\&T Bell Laboratories \\ +600 Mountain Avenue 2C-464 \\ +Murray Hill, NJ 07974-0636 \\ +{\tt cwf@research.att.com} +\and +Robert R. Henry \\ +Tera Computer Company \\ +400 N. 34th St., Suite 300 \\ +Seattle, WA 98103-8600 \\ +{\tt rrh@tera.com} +\and +Todd A. Proebsting \\ +Dept. of Computer Sciences \\ +University of Wisconsin \\ +Madison, WI 53706 \\ +{\tt todd@cs.wisc.edu} +} +\date{December 1991} + +\maketitle +\bibliographystyle{alpha} +\newcommand\term[1]{{\it #1}} +\newcommand\secref[1]{\S\ref{#1}} +\newcommand\figref[1]{Figure~\ref{#1}} +% +% rationale table making +% +{\catcode`\^^M=13 \gdef\Obeycr{\catcode`\^^M=13 \def^^M{\\}}% +\gdef\Restorecr{\catcode`\^^M=5 }} % + +% +% for printing out options +% +\newcommand\option[1]{% #1=option character +{\tt -#1}% +} +\newcommand\var[1]{% +{\tt #1}% +} +\section{Overview} + +\PROG is a program that generates a fast tree parser using BURS +(Bottom-Up Rewrite System) technology. It accepts a cost-augmented +tree grammar and emits a C program that discovers in linear time an +optimal parse of trees in the language described by the grammar. \PROG +has been used to construct fast optimal instruction selectors for use +in code generation. \PROG addresses many of the problems addressed by +{\sc Twig}~\cite{aho-twig-toplas,appel-87}, but it is somewhat less flexible and +much faster. \PROG is available via anonymous \var{ftp} from +\var{kaese.cs.wisc.edu}. The compressed \var{shar} file +\var{pub/burg.shar.Z} holds the complete distribution. + +This document describes only that fraction of the BURS model that is +required to use \PROG. Readers interested in more detail might start +with Reference~\cite{balachandran-complang}. Other relevant documents +include References~\cite{kron-phd,hoffmann-jacm,hatcher-popl,chase-popl,pelegri-popl,pelegri-phd,wilhelm-tr,henry-budp,fraser-henry-spe-91,proebsting-91}. + +\section{Input} + +\PROG accepts a tree grammar and emits a BURS tree parser. +\figref{fig-tree-grammar} shows a sample grammar that implements a very +simple instruction selector. +\begin{figure} +\begin{verbatim} +%{ +#define NODEPTR_TYPE treepointer +#define OP_LABEL(p) ((p)->op) +#define LEFT_CHILD(p) ((p)->left) +#define RIGHT_CHILD(p) ((p)->right) +#define STATE_LABEL(p) ((p)->state_label) +#define PANIC printf +%} +%start reg +%term Assign=1 Constant=2 Fetch=3 Four=4 Mul=5 Plus=6 +%% +con: Constant = 1 (0); +con: Four = 2 (0); +addr: con = 3 (0); +addr: Plus(con,reg) = 4 (0); +addr: Plus(con,Mul(Four,reg)) = 5 (0); +reg: Fetch(addr) = 6 (1); +reg: Assign(addr,reg) = 7 (1); +\end{verbatim} +\caption{A Sample Tree Grammar\label{fig-tree-grammar}} +\end{figure} +\PROG grammars are structurally similar to \YACC's. Comments follow C +conventions. Text between ``\var{\%\{}'' and ``\var{\%\}}'' is called +the \term{configuration section}; there may be several such segments. +All are concatenated and copied verbatim into the head of the generated +parser, which is called \PARSER. Text after the second ``\var{\%\%}'', +if any, is also copied verbatim into \PARSER, at the end. + +The configuration section configures \PARSER for the trees being parsed +and the client's environment. This section must define +\var{NODEPTR\_TYPE} to be a visible typedef symbol for a pointer to a +node in the subject tree. \PARSER invokes \var{OP\_LABEL(p)}, +\var{LEFT\_CHILD(p)}, and \var{RIGHT\_CHILD(p)} to read the operator +and children from the node pointed to by \var{p}. It invokes +\var{PANIC} when it detects an error. If the configuration section +defines these operations as macros, they are implemented in-line; +otherwise, they must be implemented as functions. The section on +diagnostics elaborates on \var{PANIC}. + +\PARSER computes and stores a single integral \term{state} in each node +of the subject tree. The configuration section must define a macro +\var{STATE\_LABEL(p)} to access the state field of the node pointed to +by \var{p}. A macro is required because \PROG uses it as an lvalue. A +C \var{short} is usually the right choice; typical code generation +grammars require 100--1000 distinct state labels. + +The tree grammar follows the configuration section. +\figref{fig-grammar-grammar} gives an EBNF grammar for \PROG tree +grammars. +\begin{figure} +\begin{verbatim} +grammar: {dcl} '%%' {rule} + +dcl: '%start' Nonterminal +dcl: '%term' { Identifier '=' Integer } + +rule: Nonterminal ':' tree '=' Integer cost ';' +cost: /* empty */ +cost: '(' Integer { ',' Integer } ')' + +tree: Term '(' tree ',' tree ')' +tree: Term '(' tree ')' +tree: Term +tree: Nonterminal +\end{verbatim} +\caption{EBNF Grammar for Tree Grammars for \PROG\ \label{fig-grammar-grammar}} +\end{figure} +Comments, the text between ``\var{\%\{}'' and ``\var{\%\}}'', and the +text after the optional second ``\var{\%\%}'' are treated lexically, so +the figure omits them. In the EBNF grammar, quoted text must appear +literally, \var{Nonterminal} and \var{Integer} are self-explanatory, +and \var{Term} denotes an identifier previously declared as a +terminal. {\tt\{$X$\}} denotes zero or more instances of $X$. + +Text before the first ``\var{\%\%}'' declares the start symbol and the +terminals or operators in subject trees. All terminals must be +declared; each line of such declarations begins with \var{\%term}. +Each terminal has fixed arity, which \PROG infers from the rules using that terminal. +\PROG restricts terminals to have at most two children. Each terminal +is declared with a positive, unique, integral \term{external symbol +number} after a ``\var{=}''. \var{OP\_LABEL(p)} must return the valid +external symbol number for \var{p}. Ideally, external symbol numbers +form a dense enumeration. Non-terminals are not declared, but the +start symbol may be declared with a line that begins with +\var{\%start}. + +Text after the first ``\var{\%\%}'' declares the rules. A tree grammar +is like a context-free grammar: it has rules, non-terminals, +terminals, and a special start non-terminal. The right-hand side of a +rule, called the \term{pattern}, is a tree. Tree patterns appear in +prefix parenthesized form. Every non-terminal denotes a tree. A chain +rule is a rule whose pattern is another non-terminal. If no start +symbol is declared, \PROG uses the non-terminal defined by the first +rule. \PROG needs a single start symbol; grammars for which it is +natural to use multiple start symbols must be augmented with an +artificial start symbol that derives, with zero cost, the grammar's +natural start symbols. \PARSER will automatically select one +that costs least for any given tree. + +\PROG accepts no embedded semantic actions like \YACC's, because no one +format suited all intended applications. Instead, each rule has a +positive, unique, integral \term{external rule number}, after the +pattern and preceded by a ``\var{=}''. Ideally, external rule numbers +form a dense enumeration. \PARSER uses these numbers to report the +matching rule to a user-supplied routine, which must implement any +desired semantic action; see below. Humans may select these integers +by hand, but \PROG is intended as a \term{server} for building BURS +tree parsers. Thus some \PROG clients will consume a richer +description and translate it into \PROG's simpler input. + +Rules end with a vector of non-negative, integer costs, in parentheses +and separated by commas. If the cost vector is omitted, then all +elements are assumed to be zero. \PROG retains only the first four +elements of the list. The cost of a derivation is the sum of the costs +for all rules applied in the derivation. Arithmetic on cost vectors +treats each member of the vector independently. The tree parser finds +the cheapest parse of the subject tree. It breaks ties arbitrarily. +By default, \PROG uses only the \term{principal cost} of each cost +vector, which defaults to the first element, but options described +below provide alternatives. + +\section{Output} + +\PARSER traverses the subject tree twice. The first pass or +\term{labeller} runs bottom-up and left-to-right, visiting each node +exactly once. Each node is labeled with a state, a single number that +encodes all full and partial optimal pattern matches viable at that +node. The second pass or \term{reducer} traverses the subject tree +top-down. The reducer accepts a tree node's state label and a +\term{goal} non-terminal --- initially the root's state label and the +start symbol --- which combine to determine the rule to be applied at +that node. By construction, the rule has the given goal non-terminal +as its left-hand side. The rule's pattern identifies the subject +subtrees and goal non-terminals for all recursive visits. Here, a +``subtree'' is not necessarily an immediate child of the current node. +Patterns with interior operators cause the reducer to skip the +corresponding subject nodes, so the reducer may proceed directly to +grandchildren, great-grandchildren, and so on. On the other hand, +chain rules cause the reducer to revisit the current subject node, with +a new goal +non-terminal, so \term{x} is also regarded as a subtree of \term{x}. + +As the reducer visits (and possibly revisits) each node, user-supplied +code implements semantic action side effects and controls the order in +which subtrees are visited. The labeller is self-contained, but the +reducer combines code from \PROG with code from the user, so \PARSER +does not stand alone. + +The \PARSER that is generated by \PROG provides primitives for +labelling and reducing trees. These mechanisms are a compromise +between expressibility, abstraction, simplicity, flexibility and +efficiency. Clients may combine primitives into labellers and reducers +that can traverse trees in arbitrary ways, and they may call semantic +routines when and how they wish during traversal. Also, \PROG +generates a few higher level routines that implement common +combinations of primitives, and it generates mechanisms that help debug +the tree parse. + +\PROG generates the labeller as a function named \var{burm\_label} with +the signature +\begin{verbatim} +extern int burm_label(NODEPTR_TYPE p); +\end{verbatim} +It labels the entire subject tree pointed to by \var{p} and returns the +root's state label. State zero labels unmatched trees. The trees may +be corrupt or merely inconsistent with the grammar. + +The simpler \var{burm\_state} is \var{burm\_label} without the +code to traverse the tree and to read and write its fields. It may be +used to integrate labelling into user-supplied traversal code. A +typical signature is +\begin{verbatim} +extern int burm_state(int op, int leftstate, int rightstate); +\end{verbatim} +It accepts an external symbol number for a node and the labels for the +node's left and right children. It returns the state label to assign +to that node. For unary operators, the last argument is ignored; for +leaves, the last two arguments are ignored. In general, \PROG +generates a \var{burm\_state} that accepts the maximum number of child +states required by the input grammar. For example, if the grammar +includes no binary operators, then \var{burm\_state} will have the +signature +\begin{verbatim} +extern int burm_state(int op, int leftstate); +\end{verbatim} +This feature is included to permit future expansion to operators with +more than two children. + +The user must write the reducer, but \PARSER writes code and data that +help. Primary is +\begin{verbatim} +extern int burm_rule(int state, int goalnt); +\end{verbatim} +which accepts a tree's state label and a goal non-terminal and returns the +external rule number of a rule. The rule will have matched the tree +and have the goal non-terminal on the left-hand side; \var{burm\_rule} +returns zero when the tree labelled with the given state did not match +the goal non-terminal. For the initial, root-level call, \var{goalnt} +must be one, and \PARSER exports an array that identifies the values +for nested calls: +\begin{verbatim} +extern short *burm_nts[] = { ... }; +\end{verbatim} +is an array indexed by external rule numbers. Each element points to a +zero-terminated vector of short integers, which encode the goal +non-terminals for that rule's pattern, left-to-right. The user needs +only these two externals to write a complete reducer, but a third +external simplifies some applications: +\begin{verbatim} +extern NODEPTR_TYPE *burm_kids(NODEPTR_TYPE p, int eruleno, NODEPTR_TYPE kids[]); +\end{verbatim} +accepts the address of a tree \var{p}, an external rule number, and an +empty vector of pointers to trees. The procedure assumes that \var{p} +matched the given rule, and it fills in the vector with the subtrees (in +the sense described above) of \var{p} that must be reduced recursively. +\var{kids} is returned. It is not zero-terminated. + +The simple user code below labels and then fully reduces a subject tree; +the reducer prints the tree cover. \var{burm\_string} is defined below. +\begin{verbatim} +parse(NODEPTR_TYPE p) { + burm_label(p); /* label the tree */ + reduce(p, 1, 0); /* and reduce it */ +} + +reduce(NODEPTR_TYPE p, int goalnt, int indent) { + int eruleno = burm_rule(STATE_LABEL(p), goalnt); /* matching rule number */ + short *nts = burm_nts[eruleno]; /* subtree goal non-terminals */ + NODEPTR_TYPE kids[10]; /* subtree pointers */ + int i; + + for (i = 0; i < indent; i++) + printf("."); /* print indented ... */ + printf("%s\n", burm_string[eruleno]); /* ... text of rule */ + burm_kids(p, eruleno, kids); /* initialize subtree pointers */ + for (i = 0; nts[i]; i++) /* traverse subtrees left-to-right */ + reduce(kids[i], nts[i], indent+1); /* and print them recursively */ +} +\end{verbatim} +The reducer may recursively traverse subtrees in any order, and it may +interleave arbitrary semantic actions with recursive traversals. +Multiple reducers may be written, to implement multi-pass algorithms +or independent single-pass algorithms. + +For each non-terminal $x$, \PROG emits a preprocessor directive to +equate \var{burm\_}$x$\var{\_NT} with $x$'s integral encoding. It also +defines a macro \var{burm\_}$x$\var{\_rule(a)} that is equivalent to +\var{burm\_rule(a,}$x$\var{)}. For the grammar in +\figref{fig-tree-grammar}, \PROG emits +\begin{verbatim} +#define burm_reg_NT 1 +#define burm_con_NT 2 +#define burm_addr_NT 3 +#define burm_reg_rule(a) ... +#define burm_con_rule(a) ... +#define burm_addr_rule(a) ... +\end{verbatim} +Such symbols are visible only to the code after the second +``\var{\%\%}''. If the symbols \var{burm\_}$x$\var{\_NT} are needed +elsewhere, extract them from the \PARSER source. + +The \option{I} option directs \PROG to emit an encoding of the input +that may help the user produce diagnostics. The vectors +\begin{verbatim} +extern char *burm_opname[]; +extern char burm_arity[]; +\end{verbatim} +hold the name and number of children, respectively, for each terminal. +They are indexed by the terminal's external symbol number. The vectors +\begin{verbatim} +extern char *burm_string[]; +extern short burm_cost[][4]; +\end{verbatim} +hold the text and cost vector for each rule. They are indexed by the +external rule number. The zero-terminated vector +\begin{verbatim} +extern char *burm_ntname[]; +\end{verbatim} +is indexed by \var{burm\_}$x$\var{\_NT} and holds the name of +non-terminal $x$. Finally, the procedures +\begin{verbatim} +extern int burm_op_label(NODEPTR_TYPE p); +extern int burm_state_label(NODEPTR_TYPE p); +extern NODEPTR_TYPE burm_child(NODEPTR_TYPE p, int index); +\end{verbatim} +are callable versions of the configuration macros. +\var{burm\_child(p,0)} implements \var{LEFT\_CHILD(p)}, and +\var{burm\_child(p,1)} implements \var{RIGHT\_CHILD(p)}. A sample use +is the grammar-independent expression +\var{burm\_opname[burm\_op\_label(p)]}, which yields the textual name +for the operator in the tree node pointed to by \var{p}. + +A complete tree parser can be assembled from just \var{burm\_state}, +\var{burm\_rule}, and \var{burm\_nts}, which use none of the +configuration section except \var{PANIC}. The generated routines that +use the rest of the configuration section are compiled only if the +configuration section defines \var{STATE\_LABEL}, so they can be +omitted if the user prefers to hide the tree structure from \PARSER. +This course may be wise if, say, the tree structure is defined in a +large header file with symbols that might collide with \PARSER's. + +\PARSER selects an optimal parse without dynamic programming at compile +time~\cite{aho-johnson-dp-classic}. Instead, \PROG does the dynamic +programming at compile-compile time, as it builds \PARSER. +Consequently, \PARSER parses quickly. Similar labellers have taken as +few as 15 instructions per node, and reducers as few as 35 per node +visited~\cite{fraser-henry-spe-91}. + +\section{Debugging} + +\PARSER invokes \var{PANIC} when an error prevents it from proceeding. +\var{PANIC} has the same signature as \var{printf}. It should pass its +arguments to \var{printf} if diagnostics are desired and then either +abort (say via \var{exit}) or recover (say via \var{longjmp}). If it +returns, \PARSER aborts. Some errors are not caught. + +\PROG assumes a robust preprocessor, so it omits full consistency +checking and error recovery. \PROG constructs a set of states using a +closure algorithm like that used in LR table construction. \PROG +considers all possible trees generated by the tree grammar and +summarizes infinite sets of trees with finite sets. The summary +records the cost of those trees but actually manipulates the +differences in costs between viable alternatives using a dynamic +programming algorithm. Reference~\cite{henry-budp} elaborates. + +Some grammars derive trees whose optimal parses depend on arbitrarily +distant data. When this happens, \PROG and the tree grammar +\term{cost diverge}, and \PROG attempts to build an infinite +set of states; it first thrashes and ultimately exhausts +memory and exits. For example, the tree grammar in +\figref{fig-diverge-grammar} +\begin{figure} +\begin{verbatim} +%term Const=17 RedFetch=20 GreenFetch=21 Plus=22 +%% +reg: GreenFetch(green_reg) = 10 (0); +reg: RedFetch(red_reg) = 11 (0); + +green_reg: Const = 20 (0); +green_reg: Plus(green_reg,green_reg) = 21 (1); + +red_reg: Const = 30 (0); +red_reg: Plus(red_reg,red_reg) = 31 (2); +\end{verbatim} +\caption{A Diverging Tree Grammar\label{fig-diverge-grammar}} +\end{figure} +diverges, since non-terminals \var{green\_reg} and \var{red\_reg} +derive identical infinite trees with different costs. If the cost of +rule 31 is changed to 1, then the grammar does not diverge. + +Practical tree grammars describing instruction selection do not +cost-diverge because infinite trees are derived from non-terminals +that model temporary registers. Machines can move data between +different types of registers for a small bounded cost, and the rules +for these instructions prevent divergence. For example, if +\figref{fig-diverge-grammar} included rules to move data between red +and green registers, the grammar would not diverge. If a bonafide +machine grammar appears to make \PROG loop, try a host with more +memory. To apply \PROG to problems other than instruction selection, +be prepared to consult the literature on +cost-divergence~\cite{pelegri-phd}. + +\section{Running \PROG\ }\label{sec-man-page} + +\PROG reads a tree grammar and writes a \PARSER in C. \PARSER can be +compiled by itself or included in another file. When suitably named +with the \option{p} option, disjoint instances of \PARSER should link +together without name conflicts. The command: +\begin{flushleft} +\var{burg} [ {\it arguments} ] [ {\it file} ] +\end{flushleft} +invokes \PROG. If a {\it file} is named, \PROG expects its grammar +there; otherwise it reads the standard input. The options include: +\def\Empty{} +% +\newcommand\odescr[2]{% #1=option character, #2=optional argument +\gdef\Arg2{#2}% +\item[\option{#1}\ifx\Arg2\Empty\else{{\it #2}}\fi] +} +\begin{description} +% +\odescr{c}{} $N$ +Abort if any relative cost exceeds $N$, which keeps \PROG from looping on +diverging grammars. Several +references~\cite{pelegri-popl,henry-budp,balachandran-complang,proebsting-91} +explain relative costs. +% +\odescr{d}{} +Report a few statistics and flag unused rules and terminals. +% +\odescr{o}{} {\it file} +Write parser into {\it file}. Otherwise it writes to the standard output. +% +\odescr{p}{} {\it prefix} +Start exported names with {\it prefix}. The default is \var{burm}. +% +\odescr{t}{} +Generates smaller tables faster, but all goal non-terminals passed to +\var{burm\_rule} must come from an appropriate \var{burm\_nts}. Using +\var{burm\_}$x$\var{\_NT} instead may give unpredictable results. +% +\odescr{I}{} +Emit code for \var{burm\_arity}, \var{burm\_child}, \var{burm\_cost}, +\var{burm\_ntname}, \var{burm\_op\_label}, \var{burm\_opname}, +\var{burm\_state\_label}, and \var{burm\_string}. +% +\odescr{O}{} $N$ +Change the principal cost to $N$. Elements of each cost vector are +numbered from zero. +% +\odescr{=}{} +Compare costs lexicographically, using all costs in the given order. +This option slows \PROG and may produce a larger parser. Increases +range from small to astronomical. +\end{description} + +\section{Acknowledgements} + +The first \PROG was adapted by the second author from his \CODEGEN +package, which was developed at the University of Washington with +partial support from NSF Grant CCR-88-01806. It was unbundled from +\CODEGEN with the support of Tera Computer. The current \PROG was +written by the third author with the support of NSF grant +CCR-8908355. The interface, documentation, and testing involved +all three authors. + +Comments from a large group at the 1991 Dagstuhl Seminar on Code +Generation improved \PROG's interface. Robert Giegerich and Susan +Graham organized the workshop, and the International Conference and +Research Center for Computer Science, Schloss Dagstuhl, provided an +ideal environment for such collaboration. Beta-testers included Helmut +Emmelmann, Dave Hanson, John Hauser, Hugh Redelmeier, and Bill Waite. + +\begin{thebibliography}{BMW87} + +\bibitem[AGT89]{aho-twig-toplas} +Alfred~V. Aho, Mahadevan Ganapathi, and Steven W.~K. Tjiang. +\newblock Code generation using tree matching and dynamic programming. +\newblock {\em ACM Transactions on Programming Languages and Systems}, + 11(4):491--516, October 1989. + +\bibitem[AJ76]{aho-johnson-dp-classic} +Alfred~V. Aho and Steven~C. Johnson. +\newblock Optimal code generation for expression trees. +\newblock {\em Journal of the ACM}, 23(3):458--501, July 1976. + +\bibitem[App87]{appel-87} +Andrew~W. Appel. +\newblock Concise specification of locally optimal code generators. +\newblock Technical report CS-TR-080-87, Princeton University, 1987. + +\bibitem[BDB90]{balachandran-complang} +A.~Balachandran, D.~M. Dhamdhere, and S.~Biswas. +\newblock Efficient retargetable code generation using bottom-up tree pattern + matching. +\newblock {\em Computer Languages}, 15(3):127--140, 1990. + +\bibitem[BMW87]{wilhelm-tr} +J\"{u}rgen B\"{o}rstler, Ulrich M\"{o}nche, and Reinhard Wilhelm. +\newblock Table compression for tree automata. +\newblock Technical Report Aachener Informatik-Berichte No. 87-12, RWTH Aachen, + Fachgruppe Informatik, Aachen, Fed. Rep. of Germany, 1987. + +\bibitem[Cha87]{chase-popl} +David~R. Chase. +\newblock An improvement to bottom up tree pattern matching. +\newblock {\em Fourteenth Annual ACM Symposium on Principles of Programming + Languages}, pages 168--177, January 1987. + +\bibitem[FH91]{fraser-henry-spe-91} +Christopher~W. Fraser and Robert~R. Henry. +\newblock Hard-coding bottom-up code generation tables to save time and space. +\newblock {\em Software---Practice\&Experience}, 21(1):1--12, January 1991. + +\bibitem[HC86]{hatcher-popl} +Philip~J. Hatcher and Thomas~W. Christopher. +\newblock High-quality code generation via bottom-up tree pattern matching. +\newblock {\em Thirteenth Annual ACM Symposium on Principles of Programming + Languages}, pages 119--130, January 1986. + +\bibitem[Hen89]{henry-budp} +Robert~R. Henry. +\newblock Encoding optimal pattern selection in a table-driven bottom-up + tree-pattern matcher. +\newblock Technical Report 89-02-04, University of Washington Computer Science + Department, Seattle, WA, February 1989. + +\bibitem[HO82]{hoffmann-jacm} +Christoph Hoffmann and Michael~J. O'Donnell. +\newblock Pattern matching in trees. +\newblock {\em Journal of the ACM}, 29(1):68--95, January 1982. + +\bibitem[Kro75]{kron-phd} +H.~H. Kron. +\newblock {\em Tree Templates and Subtree Transformational Grammars}. +\newblock PhD thesis, UC Santa Cruz, December 1975. + +\bibitem[PL87]{pelegri-phd} +Eduardo Pelegri-Llopart. +\newblock {\em Tree Transformations in Compiler Systems}. +\newblock PhD thesis, UC Berkeley, December 1987. + +\bibitem[PLG88]{pelegri-popl} +Eduardo Pelegri-Llopart and Susan~L. Graham. +\newblock Optimal code generation for expression trees: An application of + {BURS} theory. +\newblock {\em Fifteenth Annual ACM Symposium on Principles of Programming + Languages}, pages 294--308, January 1988. + +\bibitem[Pro91]{proebsting-91} +Todd~A. Proebsting. +\newblock Simple and efficient {BURS} table generation. +\newblock Technical report, Department of Computer Sciences, University of + Wisconsin, 1991. + +\end{thebibliography} + +\end{document} + |