Initial check in docu

2026-01-03 18:31:15 +01:00
parent e2c3cbc520
commit ee130973e2
98 changed files with 9430 additions and 0 deletions
@@ -0,0 +1,625 @@
+\chapter{Grammar of Delphi Parser Generator}
+
+\clearpage \section{Lexical analyzer}
+\begin{verbatim}
+unit dpgDpgLexer;
+
+lexer TdpgDpgLexer;
+options
+{
+   testLiterals = false;
+   k            = 2;
+}
+
+tokens
+{
+   "unit";
+   "uses";
+   "const";
+   "type";
+
+   "lexer";
+   "parser";
+
+   "options";
+   "tokens";
+   "memberdecl";
+   "memberdef";
+
+   "private";
+   "protected";
+   "public";
+
+   "returns";
+   "local";
+
+   "except";
+   "finally";
+
+   SEMPRED;
+
+   USES;
+   OPTIONS;
+   TOKENS;
+}
+
+// --------------------------------------------------------
+// Simple tokens
+// --------------------------------------------------------
+LPAREN:     '(';
+RPAREN:     ')';
+RCURLY:     '}';
+COLON:      ':';
+SEMI:       ';';
+COMMA:      ',';
+ASSIGN:     '=';
+IMPLIES:    "=>";
+QUEST:      '?';
+PLUS:       '+';
+STAR:       '*';
+NOT:        '~';
+OR:         '|';
+BANG:       '!';
+WILDCARD:   '.';
+RANGE:      "..";
+
+// --------------------------------------------------------
+// Character literal
+// --------------------------------------------------------
+CHARLIT
+   :  '\''! (ESC | ~'\'') '\''! ;
+
+// --------------------------------------------------------
+// String literal
+// --------------------------------------------------------
+STRINGLIT
+   :  '"' (ESC | ~'"')* '"' ;
+
+// --------------------------------------------------------
+// Integer
+// --------------------------------------------------------
+INTEGER local
+{
+   i: integer;
+   v: integer;
+}
+   :  DNUMBER
+      {
+         v := 0;
+         for i:=1 to Length( TokenText) do
+         begin
+            v := v * 10 + ord( TokenText[i]) - ord('0');
+         end;
+
+         TokenText := IntToStr( v);
+      }
+   ;
+
+
+
+
+// --------------------------------------------------------
+// Argument action
+// --------------------------------------------------------
+ARGACTION
+   :
+      '['!
+      (
+           '\r' '\n'   {  newLine; }
+         | '\r'        {  newLine; }
+         | '\n'        {  newLine; }
+         | ~']'
+      )*
+      ']'!
+   ;
+
+// --------------------------------------------------------
+// Action
+// --------------------------------------------------------
+ACTION
+   :
+      '{'
+      (
+           '\r' '\n' {  newLine; }
+         | '\r'      {  newLine; }
+         | '\n'      {  newLine; }
+         | ~'}'
+      )*
+      '}'
+      ( '?'! { _ttype := TT_SEMPRED; } )?
+   ;
+
+// --------------------------------------------------------
+// Token ref
+// --------------------------------------------------------
+TOKENREF
+options
+{
+   testLiterals = true;
+}
+   :  'A'..'Z' ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* ;
+
+// --------------------------------------------------------
+// Rule ref
+// --------------------------------------------------------
+RULEREF
+local
+{
+   t: integer;
+}
+   :
+      t = INT_RULEREF { _ttype := t; }
+      (
+         {t = LT_uses}?     WS_LOOP ('{'  { _ttype := TT_USES;    } )?
+      |  {t = LT_options}?  WS_LOOP ('{'  { _ttype := TT_OPTIONS; } )?
+      |  {t = LT_tokens}?   WS_LOOP ('{'  { _ttype := TT_TOKENS;  } )?
+      )?
+   ;
+
+// --------------------------------------------------------
+// Internal rule ref
+// --------------------------------------------------------
+protected INT_RULEREF returns [integer]
+{
+   _ttype := TT_RULEREF;
+}
+   :  'a'..'z' ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
+      {
+         result := TestLiteral( _ttype);
+      }
+   ;
+
+// --------------------------------------------------------
+// COMMENT
+// --------------------------------------------------------
+COMMENT
+   :  SLCOMMENT { _ttype := TT_SKIP; }
+   |  MLCOMMENT { _ttype := TT_SKIP; }
+   ;
+
+// --------------------------------------------------------
+// SLCOMMENT
+// --------------------------------------------------------
+protected SLCOMMENT
+   :
+      "//"
+      ( ~( '\r' | '\n') )*
+      (
+           '\r' '\n' {  newLine; }
+         | '\r'      {  newLine; }
+         | '\n'      {  newLine; }
+      )
+   ;
+
+
+
+
+
+
+// --------------------------------------------------------
+// Multi line comment version
+// Nested comments aren't allowed!
+// --------------------------------------------------------
+protected MLCOMMENT
+   :
+      "(*"
+      (
+         options
+         {
+            greedy = false;
+         }
+         :  '\r' '\n'   {  newLine; }
+         |  '\r'        {  newLine; }
+         |  '\n'        {  newLine; }
+         |  .
+      )*
+      "*)"
+   ;
+
+// --------------------------------------------------------
+// Numbers
+// --------------------------------------------------------
+protected   DNUMBER: '0'..'9' (DDIGIT)*;
+protected   DDIGIT:  '0'..'9';
+
+// --------------------------------------------------------
+// WS
+// --------------------------------------------------------
+WS
+   :
+      (
+           ' '
+         | '\t'      { tab;         }
+         | '\r' '\n' { newLine;     }
+         | '\r'      { newLine;     }
+         | '\n'      { newLine;     }
+      )
+      {
+         _ttype := TT_SKIP;
+      }
+   ;
+
+
+
+
+
+
+
+// --------------------------------------------------------
+// WS_LOOP
+// --------------------------------------------------------
+protected
+WS_LOOP
+   :
+      (
+         options
+         {
+            greedy = true;
+         }
+         : WS
+         | COMMENT
+      )*
+   ;
+
+// --------------------------------------------------------
+// Esc
+// --------------------------------------------------------
+protected
+ESC
+   : '\\'! ( 'r' | 'n' | 't' |  '\'' | '"' )
+   ;
+
+\end{verbatim}
+
+
+\clearpage \section{Parser}
+\begin{verbatim}
+unit dpgDpgParser;
+
+parser TdpgDpgParser;
+options
+{
+   defaultErrorHandler  = false;
+   importVocab          = dpgDpgLexer;
+   k                    = 2;
+}
+
+// --------------------------------------------------------
+// grammar
+// --------------------------------------------------------
+grammar
+   :  "unit" id SEMI
+      (usesDecl)?
+      (constDecl)?
+      (typeDecl)?
+      classDecl
+   ;
+
+// --------------------------------------------------------
+// usesDecl
+// --------------------------------------------------------
+usesDecl
+   :  USES
+      (
+         TOKENREF SEMI
+      |  RULEREF  SEMI
+      )*
+
+      RCURLY
+   ;
+
+// --------------------------------------------------------
+// constDecl
+// --------------------------------------------------------
+constDecl
+   :  "const" ACTION
+   ;
+
+// --------------------------------------------------------
+// typeDecl
+// --------------------------------------------------------
+typeDecl
+   :  "type" ACTION
+   ;
+
+// --------------------------------------------------------
+// classDecl
+// --------------------------------------------------------
+classDecl
+local
+{
+   grType: integer;
+}
+   :
+      // --------------------------------------------------
+      // Determine parser type
+      // --------------------------------------------------
+      (  "lexer"        { grType := 0; }
+      |  "parser"       { grType := 1; }
+      )
+
+      // --------------------------------------------------
+      // get class name
+      // --------------------------------------------------
+      id
+      SEMI
+
+      // --------------------------------------------------
+      // Process optional class "options {...}" clause
+      // --------------------------------------------------
+      (classOptions)?
+
+      // --------------------------------------------------
+      // Process optional class "tokens {...}" clause
+      // But only for lexers.
+      // --------------------------------------------------
+      ( {grType=0}? classTokens)?
+
+      // --------------------------------------------------
+      // Process optional class "memberDecl {...}" clause
+      // --------------------------------------------------
+      (classMemberDecl)?
+
+      // --------------------------------------------------
+      // Well, the rules
+      // --------------------------------------------------
+      rules
+
+      // --------------------------------------------------
+      // Process optional class "memberDecl {...}" clause
+      // --------------------------------------------------
+      (classMemberDef)?
+   ;
+// --------------------------------------------------------
+// classOptions
+// --------------------------------------------------------
+classOptions
+   :  OPTIONS ( id ASSIGN optionValue SEMI )* RCURLY
+   ;
+
+// --------------------------------------------------------
+// classTokens
+// --------------------------------------------------------
+classTokens
+   :
+      TOKENS
+      (
+         TOKENREF  SEMI
+      |  STRINGLIT SEMI
+      )*
+
+      RCURLY
+   ;
+
+// --------------------------------------------------------
+// classMemberDecl
+// --------------------------------------------------------
+classMemberDecl
+   :  "memberDecl" ACTION
+   ;
+
+// --------------------------------------------------------
+// classMemberDef
+// --------------------------------------------------------
+classMemberDef
+   :  "memberDef" ACTION
+   ;
+
+// --------------------------------------------------------
+// rules
+// --------------------------------------------------------
+rules
+   :  (rule)*
+   ;
+
+// --------------------------------------------------------
+// ruleExceptionBlock
+// --------------------------------------------------------
+ruleExceptionBlock
+   :  "except"    ACTION
+   |  "finally"   ACTION
+   ;
+// --------------------------------------------------------
+// altExceptionBlock
+// --------------------------------------------------------
+altExceptionBlock
+   :  "except"    ACTION
+   |  "finally"   ACTION
+   ;
+
+// --------------------------------------------------------
+// rule
+// --------------------------------------------------------
+rule
+   :
+      // --------------------------------------------------
+      // Parse rule scope
+      // --------------------------------------------------
+      (  "public"
+      |  "protected"
+      |  "private"
+      )?
+
+      // --------------------------------------------------
+      // Parse rule name
+      // --------------------------------------------------
+      id
+
+      // --------------------------------------------------
+      // Optional arguments
+      // --------------------------------------------------
+      (ARGACTION)?
+
+      // --------------------------------------------------
+      // Optional return type
+      // --------------------------------------------------
+      ("returns" ARGACTION)?
+
+      // --------------------------------------------------
+      // Optional rule options
+      // --------------------------------------------------
+      (ruleOptions)?
+
+      // --------------------------------------------------
+      // Optional rule local variable declarations
+      // --------------------------------------------------
+      ("local" ACTION)?
+
+      // --------------------------------------------------
+      // Optional rule init action
+      // --------------------------------------------------
+      (ACTION)?
+
+      // --------------------------------------------------
+      // Rule block
+      // --------------------------------------------------
+      COLON
+      block
+      SEMI
+
+      // --------------------------------------------------
+      // Optional exception handler
+      // --------------------------------------------------
+      (ruleExceptionBlock)?
+   ;
+
+// --------------------------------------------------------
+// block
+// --------------------------------------------------------
+block
+   :  alternative (OR alternative)*
+   ;
+
+// --------------------------------------------------------
+// alternative
+// --------------------------------------------------------
+alternative
+   :  (elem)*
+      (altExceptionBlock)?
+   ;
+
+// --------------------------------------------------------
+// elem
+// --------------------------------------------------------
+elem
+   : element
+   ;
+
+// --------------------------------------------------------
+// element
+// --------------------------------------------------------
+element
+local
+{
+   assignLabel : IdpgToken;
+}
+{
+   assignLabel := nil;
+}
+   :
+      (
+          id ASSIGN
+         (id COLON)?
+         (
+            RULEREF  (ARGACTION)? (BANG)?
+         |  TOKENREF (ARGACTION)?
+         )
+      )
+   |
+      (assignLabel=id COLON)?
+      (
+         RULEREF (ARGACTION)? (BANG)?
+      |  range[assignLabel]
+      |  terminal[assignLabel]
+      |  NOT (notTerminal[assignLabel] | ebnf[ assignLabel, true])
+      |  ebnf[ assignLabel, false]
+      )
+   |  ACTION
+   |  SEMPRED
+   ;
+
+// --------------------------------------------------------
+// range
+// --------------------------------------------------------
+range [pTokenLabel: IdpgToken]
+local
+   :
+      CHARLIT RANGE CHARLIT
+   |  (TOKENREF | STRINGLIT) RANGE (TOKENREF | STRINGLIT)
+   ;
+// --------------------------------------------------------
+// terminal
+// --------------------------------------------------------
+terminal [pTokenLabel: IdpgToken]
+   :
+      CHARLIT    (BANG)?
+   |  TOKENREF   (BANG)? (ARGACTION)?
+   |  STRINGLIT  (BANG)?
+   |  WILDCARD   (BANG)?
+   ;
+
+// --------------------------------------------------------
+// notTerminal
+// --------------------------------------------------------
+notTerminal [pTokenLabel: IdpgToken]
+   :  CHARLIT  (BANG)?
+   |  TOKENREF (BANG)?
+   ;
+
+// --------------------------------------------------------
+// ebnf
+// --------------------------------------------------------
+ebnf [pTokenLabel: IdpgToken; pTokenNot: boolean]
+   :  LPAREN
+      (
+            subRuleOptions (ACTION)? COLON
+         |  ACTION COLON
+      )?
+
+      block
+      RPAREN
+      (  QUEST
+      |  STAR
+      |  PLUS
+      |  IMPLIES
+      )?
+   ;
+
+// --------------------------------------------------------
+// subruleOptions
+// --------------------------------------------------------
+subruleOptions
+   :  OPTIONS (id ASSIGN optionValue)* SEMI RCURLY
+   ;
+
+// --------------------------------------------------------
+// ruleOptions
+// --------------------------------------------------------
+ruleOptions
+   :  OPTIONS (id ASSIGN optionValue)* SEMI RCURLY
+   ;
+
+// --------------------------------------------------------
+// optionValue
+// --------------------------------------------------------
+optionValue returns [IdpgToken]
+   :  result=qualifiedId
+   |  result:STRINGLIT
+   |  result:CHARLIT
+   |  result:INTEGER
+   ;
+
+// --------------------------------------------------------
+// qualifiedId
+// --------------------------------------------------------
+qualifiedId returns [IdpgToken]
+   :  id (WILDCARD id)*
+   ;
+// --------------------------------------------------------
+// id
+// --------------------------------------------------------
+id returns [IdpgToken]
+   :  result:TOKENREF
+   |  result:RULEREF
+   ;
+\end{verbatim}
@@ -0,0 +1,77 @@
+\section{Error handling}
+
+All syntactic and semantic errors cause parser exceptions to be thrown. In
+particular, the methods used to match tokens in the parser base class (match et
+al) throw §EdpgMismatchedToken§. The methods in the lexer base class used to
+match characters (match et al) throw analogous exceptions.
+
+\subsection{DPG exception hierarchy}
+
+DPG-generated parsers throw exceptions to signal recognition errors or other
+stream problems. All exceptions derive from EdpgException. The hierarchy is the
+following:
+
+\begin{verbatim}
+   EdpgException
+      EdpgMismatchedChar
+      EdpgMismatchedToken
+      EdpgSemantic
+\end{verbatim}
+
+\subsubsection{EdpgException} The EdpgException exception class is the base of
+all DPG generated exceptions. User defined exceptions must derive from this
+class.
+
+\subsubsection{EdpgMismatchedChar} This exception is thrown by the lexer when it
+is looking for a character, but finds a different one on the input stream.
+
+\subsubsection{EdpgMismatchedToken} This exception is thrown by the parser when
+it is looking for a token, but finds a different one on the input token stream.
+
+\subsubsection{EdpgSemantic} This exception is thrown by a validating semantic
+predicate.
+
+\subsection{Specifying exception handlers}
+
+DPG allows to specify specific exception handler to a given rule or
+alternative. The general form of an exception handler specification is:
+
+\begin{verbatim}
+   ... except  { code to handle exception }
+   ... finally { code to handle exception }
+\end{verbatim}
+
+\subsubsection{Exception handler for a rule}
+
+The exception handler for a rule must be placed after the terminating
+semicolon. The handler can be either an §except§ block or a §finally§ block.
+The implementation of rule will be surrounded by a try block.
+
+\begin{verbatim}
+   r : ...
+     ;
+     except { handler code }
+\end{verbatim}
+
+\subsubsection{Exception handler for an alternative}
+
+The exception handler of an alternative must be the last element of the
+alternative. Both exception handler blocks can be used. Every alternative that
+have exception block specified, will be surrounded by a §try...except/finally§
+block.
+
+\begin{verbatim}
+   r : alternative_1 ... except  { handler code }
+     | alternative_2 ... finally { handler code }
+       ...
+     | alternative_n
+     ;
+\end{verbatim}
+
+\paragraph{Note:} It is not necessary to define exception handler for each alternative.
+
+\subsubsection{Default error handler in lexer}
+
+To skip every character that isn't recognized by any public lexer rule, specify
+§filter=true§ option for a lexer. That way, the parser doesn't have to deal
+with lexical errors and ask for another token.
@@ -0,0 +1,53 @@
+\chapter{Grammars}
+\minitoc \clearpage
+
+\section{Structure of a grammar}
+
+The generic structure of a DPG grammar is the following:
+\begin{itemize}
+    \item \emph{unit declaration}
+    \item \emph{unit sections}
+    \item \emph{grammar class definition}
+    \item \emph{grammar class sections}
+\end{itemize}
+\paragraph{Note:} the order of blocks cannot be changed!
+
+\subsection{Unit declaration}
+The $unit$~$declaration$ is always the first block in any DPG grammar. It
+specifies the name of the target Pascal unit generated by DPG from the
+grammar. The syntax is identical to that of Delphi.
+\begin{alltt}
+   \textbf{unit} \emph{UnitName} ;
+\end{alltt}
+
+\subsection{Unit sections}
+The $unit$~$sections$ block must follow the $unit$~$declaration$
+block if it exists. The members of this block are optional, but
+they must appear in the following order:
+\begin{itemize}
+    \item \emph{uses section}
+    \item \emph{const section}
+    \item \emph{type section}
+\end{itemize}
+
+\subsection{Grammar class definition}
+This block defines the type of the grammar class. The possible types are
+§lexer§ and §parser§.
+\begin{alltt}
+   \textbf{lexer} \emph{myLexer} ;  // define lexer
+\end{alltt}
+or
+\begin{alltt}
+   \textbf{parser} \emph{myParser} ;  // define parser
+\end{alltt}
+
+\subsection{Grammar class sections}
+This block may contain the following sections in the order
+specified:
+\begin{itemize}
+    \item \emph{options section}
+    \item \emph{tokens section} (only for lexers)
+    \item \emph{memberdecl section}
+    \item \emph{rule definitions}
+    \item \emph{memberdef section}
+\end{itemize}
@@ -0,0 +1,63 @@
+\chapter{Introduction}
+\minitoc \clearpage
+
+\section{Overview}
+The Delphi Parser Generator is a language tool which automatically
+generates $LL(k)$ parsers in Object Pascal Language based on an
+intuitive grammar, similar to §EBNF§. The generated code mimics a
+hand-written parser, so that it is easier to debug and leads to
+shortened development time compared to state-machine based $LR$ or
+DFA/NFA parsers. To compensate theoretical limitations of $LL(k)$
+parsers, DPG features several powerful extensions enhancing its
+functionality far beyond that of standard $LL(k)$ parsers. The
+method of syntactic and semantic predicates makes the writing of
+meta-parsers simple and routine. The philosophy of DPG is to allow
+the programmer maximum control over the parsing process while
+eliminating all the routine work.
+
+\section{Features}
+\begin{itemize}
+    \item[-] Delphi code generator for $LL(k)$ lexers and parsers.
+    \item[-] Intuitive and consistent EBNF like syntax for both the lexer and the parser generator
+             resulting in a shallow learning curve.
+    \item[-] Extremely easy-to-read generated code undistinguishable from hand-written
+             parsers. The inlined statements are properly indented relative to the surrounding
+             program code.
+    \item[-] Syntactic predicates allow for conditional parsing based on
+             formal syntactic conditions, enhancing the functionality of the $LL(k)$ parsers
+             considerably.
+    \item[-] Semantic predicates allow for conditional parsing based on
+             essentially arbitrary conditions. For example, a DOM-based XML parser is easily
+             written by semantic predicates using an internal hash-table representation of
+             the DOM. Using traditional state-machine based parsers (like §YACC§), programmers
+             often need to delegate parsing tasks to the hand written part of the code. This
+             burdens them with laborious and error-prone routine work. Semantic predicates
+             prevent this, since the parser is allowed to use run-time information for the
+             parsing process dynamically.
+    \item[-] Actions can be inserted in the rules at every possible place. These actions can be
+             used for controlling the parsing process with high granularity.
+    \item[-] All rules may have return values and arguments. Rule arguments add a powerful
+             metaparsing capability completing the predicate and action mechanism optimally.
+    \item[-] All rules may have a code initialization section. This special feature is tuned
+             for Pascal to allow the programmer to declare and initialize local variables for each rule.
+    \item[-] Many convenient extensions to the plain §BNF§ syntax, such as §(...)§, §(...)?§,
+             §(...)+§, §(...)*§, which simplify the task of writing grammars and makes it less
+             error-prone.
+    \item[-] Element complements allow for matching a text not matching a given rule.
+    \item[-] Element labels are used to directly map rule information
+             to Pascal variables. They provide a seamless interaction between the
+             generated and user-written code.
+    \item[-] Intuitive Graphical User Interface with syntax highlighting, and
+             project management capabilities.
+\end{itemize}
+
+\section{Installation}
+The first step in using DPG is to install it in Delphi. However, before using
+DPG be sure to read over the License Agreement.
+\begin{itemize}
+    \item[-] run setup.exe and follow the instructions
+    \item[-] run Delphi and add your DPG run-time library directory to Delphi's
+    library path. For example, to do this for Delphi 6 select \emph{Tools} §|§ \emph{Environment Options}
+    on the menu bar. Go to the \emph{Library} tab and add the full path of your DPG run-time directory
+    to the \emph{Library Path} if you have not already done so.
+\end{itemize}
@@ -0,0 +1,40 @@
+\section{Atomic production elements}
+\subsection{Character literal}
+Single characters enclosed in quotes are character literals. A
+character literal can only be referred to within a lexer rule. For
+example, §'{'§ needs not be escaped as you are specifying the
+literal character which is to be matched. Meta symbols are used
+outside of characters and string literals to specify lexical
+structure. Special characters can be specified in a similar way to
+§C§ escape sequences. DPG accepts the following escape sequences:
+§\n§, §\r§, §\t§, §\'§, §\"§, §\\§. The §#xx§ form is not accepted
+by DPG.
+
+\subsection{String literal}
+String literals are sequences of characters enclosed in double quotes. The same
+escape sequences can be used in string literals as in character literals.
+In parser rules, strings represent tokens, and each unique string is assigned
+to a token type. Referring to a string within a lexer rule matches the
+indicated sequence of characters and is a shorthand notation. For example,
+consider the following equivalent lexer rule definitions:
+\begin{verbatim}
+   BEGIN : "begin";
+   BEGIN : 'b' 'e' 'g' 'i' 'n';
+\end{verbatim}
+
+\subsection{Wildcard}
+The wildcard §.§ within a parser rule matches any single token;
+within a lexer rule it matches any single character.
+
+\subsection{Token reference}
+Identifiers beginning with an uppercase letter are treated as
+token references. The subsequent characters may be a mixture of
+letters, digits or underscores. Referencing a token in a parser
+rule implies that you want to recognize a token with the specified
+token type. This does not actually call the associated lexer rule
+-- the lexical analysis phase delivers a stream of tokens to the
+parser. A token reference within a lexer rule implies a method
+call to that rule, and carries the same analysis semantics as a
+rule reference within a parser. So, you may specify rule arguments
+and return values for non-public tokens and for every parser rule.
+See the next section on rule references.
@@ -0,0 +1,77 @@
+\section{Error handling}
+
+All syntactic and semantic errors cause parser exceptions to be thrown. In
+particular, the methods used to match tokens in the parser base class (match et
+al) throw §EdpgMismatchedToken§. The methods in the lexer base class used to
+match characters (match et al) throw analogous exceptions.
+
+\subsection{DPG exception hierarchy}
+
+DPG-generated parsers throw exceptions to signal recognition errors or other
+stream problems. All exceptions derive from EdpgException. The hierarchy is the
+following:
+
+\begin{verbatim}
+   EdpgException
+      EdpgMismatchedChar
+      EdpgMismatchedToken
+      EdpgSemantic
+\end{verbatim}
+
+\subsubsection{EdpgException} The EdpgException exception class is the base of
+all DPG generated exceptions. User defined exceptions must derive from this
+class.
+
+\subsubsection{EdpgMismatchedChar} This exception is thrown by the lexer when it
+is looking for a character, but finds a different one on the input stream.
+
+\subsubsection{EdpgMismatchedToken} This exception is thrown by the parser when
+it is looking for a token, but finds a different one on the input token stream.
+
+\subsubsection{EdpgSemantic} This exception is thrown by a validating semantic
+predicate.
+
+\subsection{Specifying exception handlers}
+
+DPG allows to specify specific exception handler to a given rule or
+alternative. The general form of an exception handler specification is:
+
+\begin{verbatim}
+   ... except  { code to handle exception }
+   ... finally { code to handle exception }
+\end{verbatim}
+
+\subsubsection{Exception handler for a rule}
+
+The exception handler for a rule must be placed after the terminating
+semicolon. The handler can be either an §except§ block or a §finally§ block.
+The implementation of rule will be surrounded by a try block.
+
+\begin{verbatim}
+   r : ...
+     ;
+     except { handler code }
+\end{verbatim}
+
+\subsubsection{Exception handler for an alternative}
+
+The exception handler of an alternative must be the last element of the
+alternative. Both exception handler blocks can be used. Every alternative that
+have exception block specified, will be surrounded by a §try...except/finally§
+block.
+
+\begin{verbatim}
+   r : alternative_1 ... except  { handler code }
+     | alternative_2 ... finally { handler code }
+       ...
+     | alternative_n
+     ;
+\end{verbatim}
+
+\paragraph{Note:} It is not necessary to define exception handler for each alternative.
+
+\subsubsection{Default error handler in lexer}
+
+To skip every character that isn't recognized by any public lexer rule, specify
+§filter=true§ option for a lexer. That way, the parser doesn't have to deal
+with lexical errors and ask for another token.
@@ -0,0 +1,265 @@
+\section{Options}
+
+The §options{...}§ section is used to specify options for grammar
+elements. i.e. elements are the lexer/parser classes, rules and
+subrules. This section is preceded by the options keyword and
+contains a series of option/value assignments surrounded by curly
+braces.
+
+\subsection{k}
+\begin{table}[H]
+  \small
+  \begin{tabular}{rl}
+     \emph{synopsis:}    & set lookahead depth                   \\
+     \emph{context:}     & parser/lexer class declaration        \\
+     \emph{type:}        & integer                               \\
+     \emph{default:}     & 1
+  \end{tabular}
+\end{table}
+
+For any grammar, the lookahead depth can be specified by using the $k$ option.
+
+\begin{verbatim}
+   lexer myLexer;
+   options
+   {
+      k = 2;
+   }
+\end{verbatim}
+
+Setting the lookahead depth changes the maximum number of tokens that will be
+examined to select alternative productions, and test for exit conditions of the
+§EBNF§ constructs §(...)?§, §(...)+§, and §(...)*§. The lookahead analysis is
+linear approximate (as opposed to full $LL(k)$ ). Consider this example with
+$k=2$:
+\begin{verbatim}
+  r  : ( A B | B A )
+     |  A A
+     ;
+\end{verbatim}
+
+Full $LL(k)$ analysis would resolve the ambiguity and produce a
+lookahead test for the first alternative like:
+\begin{verbatim}
+   if (LA(1)=A and LA(2)=B) or (LA(1)=B and LA(2)=A)
+\end{verbatim}
+
+Linear approximate analysis would logically OR the lookahead sets at each
+depth, resulting in a test like:
+
+\begin{verbatim}
+   if (LA(1)=A or LA(1)=B) and (LA(2)=A or LA(2)=B)
+\end{verbatim}
+
+Which is ambiguous for the second alternative for §{A,A}§.
+Therefore, setting the lookahead depth very high tends to yield
+diminishing returns in most cases, because the lookahead sets at
+large depths will include almost everything. This problem can be
+solved using a syntactic predicate.
+
+
+\subsection{importVocab}
+\begin{table}[H]
+  \small
+  \begin{tabular}{rl}
+     \emph{synopsis:}    & set initial grammar vocabulary       \\
+     \emph{context:}     & parser/lexer class declaration       \\
+     \emph{type:}        & ID                                   \\
+     \emph{default:}     & none
+  \end{tabular}
+\end{table}
+
+The import vocabulary for a grammar class can be specified using the
+§importVocab§ option.
+
+\begin{verbatim}
+   lexer myLexer;
+   options
+   {
+      importVocab = XML;
+   }
+\end{verbatim}
+
+DPG will look for the token exchange file named §XMLTokens.txt§,
+and import all the token definitions from it. Parser grammar must
+use this option, because without that, it cannot communicate with
+the lexer. Lexer grammar can use this option too. It is useful,
+when a parser class uses multiple lexers to get tokens from the
+input stream. The vocabulary file has an identifier on the first
+line that names the token vocabulary. All subsequent lines are of
+the form §ID=value§ or §ID="literal"=value§. For example:
+
+\begin{verbatim}
+   ThocLexer
+   TT_EOF = 1
+   TT_LPAREN = 4
+   TT_RPAREN = 5
+   LT_const = "const" = 6
+\end{verbatim}
+
+The token exchange file is automatically generated by DPG for each grammar.
+\paragraph{Note:} you must take care of the order of grammars in a DPG project.
+Vocabulary-generating grammars must appear before vocabulary-consuming
+grammars.
+
+\subsection{exportVocab}
+\begin{table}[H]
+  \small
+  \begin{tabular}{rl}
+     \emph{synopsis:}    & set export grammar vocabulary        \\
+     \emph{context:}     & parser/lexer class declaration       \\
+     \emph{type:}        & ID                                   \\
+     \emph{default:}     & grammar class name
+  \end{tabular}
+\end{table}
+
+The vocabulary of a grammar is the union of the set of tokens provided by an
+§importVocab§ option and the set of tokens and literals defined in the grammar.
+
+\begin{verbatim}
+   lexer myParser;
+   options
+   {
+      exportVocab = XML1;
+   }
+\end{verbatim}
+
+If the exportVocab options isn't specified, then DPG will use the
+grammar class name to export the vocabulary. DPG generates the
+following files for the examp\-le above: §XML1Tokens.txt§ for
+token exchange, and XML1Tokens.pas for the grammar class.
+
+\subsection{testLiterals}
+\begin{table}[H]
+  \small
+  \begin{tabular}{rl}
+     \emph{context:}     & lexer class declaration, lexer rule  \\
+     \emph{type:}        & boolean                              \\
+     \emph{default:}     & false
+  \end{tabular}
+\end{table}
+
+By default, DPG doesn't generate code to check the literals table
+(the table generated for literal strings), because checking the
+literals table after each token recognition is expensive. Instead,
+it checks string literals in a lexer rule, that can recognize
+them. The string literals table contains the strings defined in
+the §tokens{...}§ section of a lexer grammar.
+\begin{verbatim}
+   lexer myLexer;
+   options
+   {
+      testLiterals = false;
+   }
+   tokens
+   {
+      "function";
+      "procedure";
+      ...
+   }
+
+   ID
+   options
+   {
+      testLiterals = true;
+   }
+      : (‘A’..’Z’ | ‘a’..’z’)(‘A’..’Z’ | ‘a’..’z’ | ‘0’..‘9’)*
+      ;
+\end{verbatim}
+
+In the example above, if the input is matched by the rule §ID§
+then the implementation of the rule will check the literals table
+for the matched token. If it exists, then the returned token type
+will be set to the token type assigned to the string literal in
+the literals table. Otherwise the returned token type will remain
+unchanged.
+
+It is possible to check the literals table explicitly within an
+action using the Test\-Li\-te\-ral method:
+
+\begin{verbatim}
+   {
+      ...
+      _ttype := TestLiteral;
+      _ttype := TestLiteral( _ttype);
+      ...
+   }
+\end{verbatim}
+
+\subsection{caseSensitive}
+\begin{table}[H]
+  \small
+  \begin{tabular}{rl}
+     \emph{context:}     & lexer class declaration              \\
+     \emph{type:}        & boolean                              \\
+     \emph{default:}     & false
+  \end{tabular}
+\end{table}
+
+\begin{verbatim}
+   lexer myLexer;
+   options
+   {
+      caseSensitive = true;
+   }
+\end{verbatim}
+
+Case is ignored when comparing against character and string literals in the
+lexer. The case of the input stream is maintained when stored in the token
+objects.
+
+\subsection{filter}
+\begin{table}[H]
+  \small
+  \begin{tabular}{rl}
+     \emph{context:}     & lexer class declaration              \\
+     \emph{type:}        & boolean / ID                         \\
+     \emph{default:}     & false
+  \end{tabular}
+\end{table}
+
+\begin{verbatim}
+   lexer myLexer;
+   options
+   {
+      filter = true;
+   }
+\end{verbatim}
+
+When §true§, the lexer ignores any input not exactly matching one of the public
+lexer rules.
+
+Notice that the filter rule must track new-lines in the general
+case where the lexer might emit error messages.
+
+When set to a rule name, the filter rule is invoked either when the lookahead
+(in nextToken) predicts none of the public lexical rules or when one of those
+rules fails.  In the latter case, the input is rolled back before attempting
+the filter rule. Option §filter=true§ is like having a filter rule such as:
+
+\begin{verbatim}
+   IGNORE : . ;
+\end{verbatim}
+
+\subsection{ignore}
+\begin{table}[H]
+  \small
+  \begin{tabular}{rl}
+     \emph{context:}     & lexer rule              \\
+     \emph{type:}        & ID                      \\
+     \emph{default:}     & none
+  \end{tabular}
+\end{table}
+
+\begin{verbatim}
+   lexer myLexer;
+   options
+   {
+      ignore = MyIgnoreRule;
+   }
+\end{verbatim}
+
+Specify a lexer rule to use a white space between lexical rule
+atomic elements (chars, strings, and rule references). The grammar
+analysis, and hence the look\-ahead sets, are aware of the
+whitespace references.
@@ -0,0 +1,41 @@
+\section{Production element operators}
+
+\subsection{Element complement}
+The unary not operator $\sim$ may be applied to an atomic element
+such as a token identifier. For some token atom §T§, $\sim$§T§
+matches any token other than §T§ except end-of-file. Within lexer
+rules, $\sim$§'a'§ matches any character other than character
+§'a'§. The sequence $\sim$§.§ (``not anything'') is meaningless
+and not allowed. Example:
+\begin{verbatim}
+   SL_COMMENT : "//" (~'\n')* '\n';
+\end{verbatim}
+
+\subsection{Set complement}
+The unary not operator $\sim$ can also be used to construct a
+token set or character set by complementing another set. This is
+most useful when you want to match tokens or characters until a
+certain delimiter set is encountered. Rather than invent a special
+syntax for such sets, DPG allows the placement of $\sim$ in front
+of a subrule containing only simple elements and no actions. The
+simple elements may be token references, token ranges, character
+literals, or character ranges. For example:
+\begin{verbatim}
+   SL_COMMENT : "//" (~('\r'|'\n'))* ('\r'|'\n');
+\end{verbatim}
+
+\subsection{Range operator}
+The binary range operator §..§ is used to define a range of atom
+which may be matched. The expression §c1..c2§ in a lexer matches
+characters included in that range. The expression §T..U§ in a
+parser matches any token whose token type is inclusively in that
+range, which is of dubious value if the token types are generated
+externally.
+
+\subsection{Ignore operator}
+In lexer grammars, the ignore operator §!§ can be applied to any
+atomic production element. It means that the element followed by
+the §!§ operator should not appear in the result token. Example:
+\begin{verbatim}
+   STRING : '"'! (~'"')* '"'! ;
+\end{verbatim}
@@ -0,0 +1,82 @@
+\section{Element labels}
+
+Any atomic production element can be labeled by an identifier (case is insignificant).
+For a labelled atomic element, the identifier is used within a semantic action to access
+the associated Token object or character. For example,
+
+\begin{verbatim}
+   assign
+      : v:ID EQUALS expr SEMI
+        {
+           writeln(‘Assign to ‘ + v.TokenText);
+        }
+      ;
+\end{verbatim}
+
+\section{EBNF rule elements}
+
+DPG supports the following extended BNF notations:
+\begin{table}[H]
+  \small
+  \begin{tabular}{ll}
+%     \hline
+     §(...) §     & -- exactly one occurrence of a subrule   \\
+     §(...)?§     & -- zero or one occurrence of a subrule   \\
+     §(...)+§     & -- one or more occurrence of a subrule   \\
+     §(...)*§     & -- zero or more occurrence of a subrule
+%     \hline
+  \end{tabular}
+\end{table}
+
+\section{Rule arguments}
+Character sequences in square brackets are arguments or return type specifiers.
+Square brackets within string and character literals are not argument
+delimiters. The arguments within §[]§ must follow the Object Pascal syntax.
+
+\section{Exception handlers}
+
+DPG allows the specification of exception handlers specific to a
+given rule or alternative. The general form of an exception
+handler specification is:
+
+\begin{verbatim}
+   ... except  { code to handle exception }
+   ... finally { code to handle exception }
+\end{verbatim}
+
+\subsection{Exception handler for a rule}
+
+The exception handler for a rule must be placed after the
+terminating semicolon. The handler can be either an §except§ block
+or a §finally§ block. The implementation of a rule will be
+surrounded by a try block.
+
+\begin{verbatim}
+   r : ...
+     ;
+     except { handler code }
+\end{verbatim}
+
+\subsection{Exception handler for an alternative}
+
+The exception handler of an alternative must be the last element
+of the alternative. Both exception handler blocks can be used.
+Every alternative that has an exception block will be surrounded
+by a §try...except/finally§ block.
+
+\begin{verbatim}
+   r : alternative_1 ... except  { handler code }
+     | alternative_2 ... finally { handler code }
+       ...
+     | alternative_n
+     ;
+\end{verbatim}
+
+\paragraph{Note:} It is not necessary to define an exception handler for each alternative.
+
+\subsection{Default error handler in lexer}
+
+To skip every character that isn't recognized by any public lexer
+rule, specify the option §filter=true§ for a lexer. That way, the
+parser doesn't have to deal with lexical errors and ask for
+another token.
@@ -0,0 +1,249 @@
+\section{Sections}
+
+\subsection{unit}
+The unit section specifies the unit name of the generated source file.
+The syntax is identical to Object Pascal.
+
+\subsection{uses}
+The §uses{...}§ section is used to specify the units which must be
+included in the interface's uses clause of the generated pascal
+unit. Every unit name must be terminated by a semicolon. Repeated
+units are included only once.
+\begin{verbatim}
+   uses
+   {
+      Classes;
+      Windows;
+   }
+\end{verbatim}
+
+
+\subsection{const}
+The §const{...}§ section is used to specify items that appear in
+the interface's const clause of the generated pascal unit. The
+content of this section is copied verbatim into the unit.
+\begin{verbatim}
+   const
+   {
+      const1 = 12;
+      const2 = ‘FOO’;
+   }
+\end{verbatim}
+
+\subsection{type}
+The §type{...}§ section is used to specify items that appear in
+the interface's type clause of the generated pascal unit. The
+content of this section is copied verbatim into the unit.
+\begin{verbatim}
+   type
+   {
+      TmyType1 = integer;
+      TmyType2 = array [0..16] of TmyType1;
+   }
+\end{verbatim}
+
+\subsection{options}
+The §options{...}§ section contains options for a given grammar
+element. Options can be defined for lexer/parser classes, rules
+and subrules.
+
+\subsection{tokens}
+If you need to define an ``imaginary'' token (i.e. one that has no
+corresponding real input symbol) use the §tokens{...}§ section to
+define them. You can also define literals in this section.
+
+\begin{verbatim}
+    tokens
+    {
+       "procedure";
+       "function";
+       INTEGER;
+    }
+\end{verbatim}
+
+Strings defined in this way are treated just as if you had referenced them in
+the parser. The formal syntax is:
+
+\begin{verbatim}
+   tokenSpecification
+      : "tokens"
+        LCURLY
+        (tokenItem SEMI)*
+        RCURLY
+      ;
+
+   tokenItem
+      : TOKEN
+      | STRING
+      ;
+\end{verbatim}
+
+The §tokens{...}§ section is only valid in lexer grammars.
+
+\subsection{memberdecl}
+The §memberdecl{...}§ section contains additional member
+declarations for the grammar class. It allows the expansion of the
+grammar class with user defined members, so it is not necessary to
+derive new classes from the generated class to implement
+additional functionality. The content of this section is copied
+verbatim into the class declaration of the generated grammar
+class.
+\begin{verbatim}
+   memberdecl
+   {
+      procedure proc1;
+      procedure proc2;
+   }
+\end{verbatim}
+
+\subsection{memberdef}
+The §memberdef{...}§ section contains the implementation of the
+classes' additional functionality. The content of this section is
+copied verbatim into the implementation part of the generated
+unit. This section may also contain the initialization and
+finalization clauses.
+
+\begin{verbatim}
+   memberdef
+   {
+      procedure TmyClass.proc1;
+      begin
+      ...
+      end;
+
+      procedure TmyClass.proc2;
+      begin
+      ...
+      end;
+   }
+\end{verbatim}
+
+\subsection{parser}
+Parser rules must be associated with a parser class. Each parser
+class specification precedes the options, and rule definitions of
+the parser. Grammar files §.g§ can hold only one class definition.
+A parser specification in a grammar file looks like:
+\begin{verbatim}
+   unit myParser;
+   uses...           // optional uses {...} section
+   const...          // optional const {...} section
+   type...           // optional type {...} section
+
+   parser TmyParser;
+
+   options...        // optional options {...} section
+   memberdecl...     // optional memberdecl {...} section
+   parser rules...
+   memberdef...      // optional memberdef {...} section
+\end{verbatim}
+
+In the generated code, the parser class results in an Object
+Pascal class, and the rules become member methods of the class.
+
+Note, that the content of the §memberdecl{...}§ section is copied
+verbatim into the class declaration part of the generated parser
+class while the content of the §memberdef{...}§ section is copied
+after the implementation of the member rules, so the
+initialization and finalization clauses of a pascal unit can be
+placed in the §memberdef{...}§ section.
+
+\subsection{lexer}
+To perform lexical analysis, you need to specify a lexer class that describes
+how to break up the input character stream into a stream of tokens. The syntax
+is similar to that of a parser class:
+\begin{verbatim}
+   unit myLexer;
+   uses...           // optional uses {...} section
+   const...          // optional const {...} section
+   type...           // optional type {...} section
+
+   lexer TmyLexer;
+
+   options...        // optional options {...} section
+   tokens...         // optional tokens {...} section
+   memberdecl...     // optional memberdecl {...} section
+   lexer rules...
+   memberdef...      // optional memberdef {...} section
+\end{verbatim}
+
+Lexical rules contained within a lexer class become member methods in the
+generated class. A lexer grammar may have a §tokens{...}§ section to specify
+imaginary tokens and string literals.
+
+\subsection{rule definitions}
+The structure of an input stream of atoms is specified by a set of
+mutually-referenced rules. Each rule has a name and any of the
+following optional attributes: a scope specifier; a set of
+arguments; an init-action; a return value; local variable
+definitions; an exception handler and an alternative or
+alternatives. Each alternative contains a series of elements that
+specify what to match and where. Scope can be specified by
+private, protected, or public keywords. A rule has public scope by
+default. The basic form of a rule is:
+\begin{verbatim}
+   (scope) rulename
+      :  alternative_1
+      |  alternative_2
+         ...
+      |  alternative_n
+      ;
+\end{verbatim}
+
+Parameters for a rule can be specified in the following form:
+\begin{verbatim}
+   rulename [formal parameters] : ... ;
+\end{verbatim}
+
+If the rule returns a value, it’s type can be defined with the
+returns keyword:
+\begin{verbatim}
+   rulename returns [typename] : ... ;
+\end{verbatim}
+
+where §typename§ is a valid Object Pascal type specifier.
+
+Local variables for a rule can be defined in the §local{...}§ section:
+
+\begin{verbatim}
+   rule
+   local
+   {
+      foo: integer;
+      bar: string;
+   }
+\end{verbatim}
+
+Init-actions are specified before the colon. Init-actions differ from normal
+actions because they are always executed regardless of guess mode.
+
+\begin{verbatim}
+   rule
+   {
+      init-action
+   }
+      : ... ;
+\end{verbatim}
+
+
+\paragraph{Parser rules} apply structure to a stream of tokens, whereas
+lexer rules apply structure to a stream of characters. Parser
+rules, therefore, must not reference cha\-rac\-ter literals.
+Double-quoted strings in parser rules are considered to be token
+references. Note: all parser rules must begin with a lowercase
+letter.
+
+\paragraph{Lexer rules} defined within a lexer grammar must have a name beginning
+with an uppercase letter. These rules implicitly match
+cha\-rac\-ters on the input stream instead of tokens on the token
+stream. Referenced grammar elements include token references
+(implicit lexer rule references), cha\-rac\-ters and strings.
+Lexer rules are processed in the same manner as parser rules, and
+may also specify arguments and return values. A scope specifier
+for a lexer rule has special meaning in lexer grammars. In the
+generated Object Pascal unit, the lexer class has a §nextToken§
+function which is the interface between the lexer and the parser.
+This function is synthesized from the public lexer rules. It means
+that non-public lexer rules don't modify the prediction logic of
+the lexer. They are usually helper rules. If the lexer grammar has
+no public rule at all, the §nextToken§ function returns EOF to the
+parser.
@@ -0,0 +1,79 @@
+\section{Simple production elements}
+\subsection{Rule reference}
+Identifiers beginning with lowercase letter are treated as parser
+rule references. The subsequent characters may be any letter,
+digit, number, or underscore. Lexical rules may not reference
+parser rules. Referencing a rule implies a method call to that
+rule at that point in the parse. You may pass parameters and
+obtain return values. For example, formal and actual parameters
+are specified within square brackets:
+\begin{verbatim}
+   function
+      : type ID LPAREN args RPAREN block [1]
+      ;
+
+   block [scope: integer]
+      : LCURLY
+        ...
+        { (* use arg 'scope' *) }
+        ...
+        RCURLY
+      ;
+\end{verbatim}
+
+Return values that are stored in variables use a simple assignment
+notation:
+\begin{verbatim}
+   set
+   local
+   {
+      ids : TStringList;
+   }
+   {
+      ids := nil;
+   }
+      :  LPAREN ids=idList RPAREN
+      ;
+
+   idList returns [TStringList]
+   {
+      result := TStringList.Create;
+   }
+      : id:ID { result.Add( id.TokenText;); }
+        (
+           COMMA id:ID
+           {
+              result.Add( id.TokenText;);
+           }
+        )*
+      ;
+\end{verbatim}
+
+\subsection{Semantic action}
+Actions are blocks of Object Pascal source code enclosed in curly braces. The
+code is executed after the preceding production element has been recognized and
+before the recognition of the following element. Actions are typically used to
+generate out\-put, construct trees, or modify a symbol table. An action's
+position dictates when it is recognized relative to the surrounding grammar
+elements.
+
+If the action is the first element of a production, it is executed
+before any other e\-le\-ment in that production, but only if that
+production is predicted by the lookahead.
+
+The first action of an §EBNF§ subrule may be followed by §:§.
+Doing so de\-sig\-na\-tes the action as an init-action and
+associates it with the subrule as a whole, instead of any
+production. It is executed immediately upon entering the subrule,
+and is executed even while guessing (testing syntactic
+predicates). For example:
+
+\begin{verbatim}
+   ( { init-action} :
+     { action of 1st production} production1
+   | { action of 2nd production} production2
+   )?
+\end{verbatim}
+
+The init-action would be executed regardless of what (if anything)
+matched in the optional subrule.
@@ -0,0 +1,49 @@
+Delphi Parser Generator (DPG) uses the ASCII character set,
+including the letters \emph{A} through \emph{Z} and \emph{a}
+through \emph{z}, the digits \emph{0} through \emph{9}, and other
+standard characters. It is case sensitive. The space character
+(ASCII 32), the tab character (ASCII 9), and the new-line
+characters (ASCII 13,10) are called \emph{white-space} characters.
+
+\section{General}
+\subsection{Comments}
+DPG accepts single and multi-line comments. Single-line comments begin with
+§//§ while multi-line (block) comments are enclosed by §(*§~and~§*)§.
+
+\subsection{White Space}
+Spaces, tabs, and new-lines (including most used §CR-LF§, §CR§,
+§LF§ constructions) are separators in that they separate DPG
+symbols, such as identifiers. White spaces have no additional
+significance i.e. the code layout does not play any semantical
+role. However the layout of the embedded Delphi code is preserved
+in the ge\-ne\-ra\-ted source files.
+
+\subsection{Symbols}
+DPG uses the following punctuation and keywords:
+
+\begin{table}[H]
+  \small
+  \begin{center}
+    \begin{tabular}{|ll|ll|}
+       \hline
+       §(...)§      & subrule               & §unit§        & unit name                 \\
+       §(...)*§     & closure subrule       & §uses§        & uses section              \\
+       §(...)+§     & positive closure      & §const§       & const section             \\
+       §(...)?§     & optional subrule      & §type§        & type section              \\
+       §[...]§      & rule arguments        & §lexer§       & lexer class               \\
+       §{...}§      & semantic action       & §parser§      & parser class              \\
+       §{...}?§     & semantic predicate    & §options§     & options section           \\
+       §(...)=>§    & syntactic predicate   & §tokens§      & tokens section            \\
+       §  |§        & alternative operator  & §returns§     & rule return value         \\
+       §  ..§       & range operator        & §except§      & exception handler         \\
+       §  ~§        & not operator          & §finally§     & exception handler         \\
+       §  !§        & ignore operator       & §memberdecl§  & member declaration        \\
+       §  .§        & wildcard              & §memberdef§   & member definition         \\
+       §  =§        & assignment operator   & §local§       & local rule variables      \\
+       §  :§        & label, start rule     &               &                           \\
+       §  ;§        & end rule              &               &                           \\
+       \hline
+    \end{tabular}
+  \end{center}
+  \caption{DPG symbols}
+\end{table}
@@ -0,0 +1,10 @@
+\chapter{Syntactic elements}
+\minitoc
+\clearpage
+\include{src/lang/lang-syntactic}
+\include{src/lang/lang-atomprod}
+\include{src/lang/lang-simpprod}
+\include{src/lang/lang-prodoper}
+\include{src/lang/lang-sect}
+\include{src/lang/lang-opt}
+\include{src/lang/lang-rest}
@@ -0,0 +1,94 @@
+\section{Error handling}
+
+All syntactic and semantic errors throw exceptions. In particular,
+the methods used to match tokens in the parser base class (match
+etc) throw §EdpgMismatchedToken§. The methods in the lexer base
+class used to match characters (match etc) throw exceptions
+similarly.
+
+\subsection{DPG exception hierarchy}
+
+DPG-generated parsers throw exceptions to signal recognition
+errors or other stream problems. All exceptions derive from
+EdpgException. The hierarchy is as follows:
+
+\begin{verbatim}
+   EdpgException
+      EdpgMismatchedChar
+      EdpgMismatchedToken
+      EdpgSemantic
+\end{verbatim}
+
+\subsection{EdpgException}
+The §EdpgException§ is the base class for all DPG exceptions. It
+defines the following read-only properties:
+\begin{alltt}
+   FileName : string;
+   Line     : integer;
+   Column   : integer;
+\end{alltt}
+These properties contain information about the location where the exception
+occurred.
+
+\subsection{EdpgMismatchedChar}
+The §EdpgMismatchedChar§ exception is thrown by the lexer when it
+is looking for a character, but finds a different one on the input
+stream than expected. It defines the following properties in
+addition to those of §EdpgException§.
+\begin{alltt}
+   FoundChar    : char;
+   FoundString  : string;
+   CharSet      : TdpgCharSet;
+   Str          : string;
+   Inverted     : boolean;
+\end{alltt}
+The §FoundChar§ and §FoundString§ properties contain the character
+or string that was found on the input stream. The §CharSet§ and
+§Str§ properties contain the values which the lexer expected to
+find. The §Inverted§ property is set only if the exception came
+from a §MatchNot(...)§ operation. In this case, the §CharSet§
+property contains the values, that the lexer must §not§ match. The
+validity of pro\-per\-ti\-es are shown in the next table,
+depending on the kind of exception.
+
+\begin{table}[H]
+  \small
+  \begin{center}
+      \begin{tabular}{lcc}
+                        & Mismatched char   & Mismatched string \\
+         \hline
+         FoundChar      &  valid            & -                 \\
+         FoundString    &   -               & valid             \\
+         CharSet        &  valid            & -                 \\
+         Str            &   -               & valid             \\
+         Inverted       &  valid            & -                 \\
+         \hline
+      \end{tabular}
+  \end{center}
+\end{table}
+
+\subsection{EdpgMismatchedToken}
+The §EdpgMismatchedToken§ exception is thrown by the parser when
+it is looking for a token, but finds a different one on the input
+token stream than expected. It defines the following properties in
+addition to those of §EdpgException§.
+\begin{alltt}
+   FoundToken   : IdpgToken;
+   TokenSet     : TdpgByteSet;
+   Inverted     : boolean;
+\end{alltt}
+The §FoundToken§ property contains the token the parser received from the
+lexer. The §TokenSet§ property contains the vaules the parser expected to
+get. The §Inverted§ property is set only if the exception came from a
+§MatchNot(...)§ operation. In this case, the §TokenSet§ property contains the
+values the parser must §not§ get.
+
+\subsection{EdpgSemantic}
+This exception is thrown by a validating semantic predicate. It
+defines the following property in addition to those of
+§EdpgException§.
+\begin{alltt}
+   Assert : string;
+\end{alltt}
+The §Assert§ property contains the validating expression that caused the
+exception.
@@ -0,0 +1,3 @@
+\chapter{Run-time}
+\minitoc \clearpage
+\include{src/rt/rt-err}
@@ -0,0 +1,200 @@
+\chapter{Getting started}
+\minitoc \clearpage
+
+In this chapter, we develop a simple calculator. It accepts integers, the four
+arithmetic operators (§+§,§-§,§/§,§*§), and  parenthesis on its input.
+Spaces, tabs and newline characters are treated as white spaces and used for
+separating tokens. Complete Expressions must be terminated by semicolons.
+
+\section{Lexical analyzer}
+
+Let us define the calculator's lexer.
+
+\begin{verbatim}
+    1 unit myLexer;
+    2
+    3 lexer TmyLexer;
+    4 options
+    5 {
+    6   exportvocab = myLexer;
+    7 }
+\end{verbatim}
+In line §1§ we define the unit name of the generated Pascal source
+file for the lexer. In line §3§ we give a name to the lexer class.
+If there is an §options§ block for a grammar class, it must follow
+the class declaration. Here, we define one option for the lexer:
+§exportVocab§. This option tells the DPG that all the token
+definitions must be exported to §myLexerTokens.txt§ and
+§myLexerTokens.pas§. Grammars can import the generated token names
+using the exported §.txt§ files.
+
+\paragraph{Note:} it is not necessary to define the §exportVocab§ option for a
+grammar. The file names for the token exchange files are automatically created
+using the specified unit name.
+
+Now we define the lexer tokens.
+
+\begin{verbatim}
+    8 LPAREN: '(';
+    9 RPAREN: ')';
+   10 PLUS:   '+';
+   11 MINUS:  '-';
+   12 STAR:   '*';
+   13 SLASH:  '/';
+   14 SEMI:   ';';
+\end{verbatim}
+In lines from §8§ to §14§, there are simple token definitions. Each of them
+recognizes one character from the input stream.
+
+\begin{verbatim}
+   15 INT: ('0'..'9')+ ;
+\end{verbatim}
+In line §15§, we define a rule to recognize integer numbers. This tells us that
+the INT consists of one or more numeric characters.
+
+Now, define a rule to handle white space characters.
+\begin{verbatim}
+   16 WS
+   17    : '\r' '\n'    { _ttype := TT_SKIP; }
+   18    | '\t'         { _ttype := TT_SKIP; }
+   19    | ' '          { _ttype := TT_SKIP; }
+   20    ;
+\end{verbatim}
+Characters surrounded by curly braces are actions. The content of
+an action block will be copied verbatim into the generated Pascal
+source file. In this example the expression §_ttype := TT_SKIP;§
+forbids the §WS§ rule to generate a token, because we don't need
+it.
+
+Now the lexer definition is finished. This simple lexer recognizes relevant
+characters, integers and skips every white spaces on its input.
+
+\section{Parser}
+
+Now we define the parser.
+
+\begin{verbatim}
+    1 unit myParser;
+    2
+    3 parser TmyParser;
+    4 options
+    5 {
+    6    importVocab = myLexer;
+    7 }
+\end{verbatim}
+This part is analogous to lexer definition with one exception. In
+line §6§, we import the tokens from a file specified by the
+§exportVocab§ option in the lexer grammar. Now, the parser knows
+which tokens are to be expected from the lexer.
+
+\begin{verbatim}
+    8 memberdecl
+    9 {
+   10    value: integer;
+   11 }
+\end{verbatim}
+In lines from §8§ to §11§, we specify the §memberdecl§ section. This section is
+used to define members for the generated parser class. In this example, the §TmyParser§
+class will have a member called §value§. We use this member to store the result
+of the calculation for the current expression.
+
+Now we define the rules.
+\begin{verbatim}
+   12 calc
+   13    : (expression SEMI { writeln( value); } )*
+   14    ;
+   15
+   16 expression
+   17 local
+   18 {
+   19   temp : integer;
+   20 }
+   21   :  term             { temp  := value;           }
+   22      (
+   23         PLUS  term    { temp  := temp + value;    }
+   24      |  MINUS term    { temp  := temp - value;    }
+   25      )*               { value := temp;            }
+   26   ;
+\end{verbatim}
+In lines §17..20§, we define a local variable for the rule
+§expression§. The following rules are defined in a similar way to
+the rule §expression§.
+
+\begin{verbatim}
+   27 term
+   28 local
+   29 {
+   30   temp : integer;
+   31 }
+   32   :  factor           { temp  := value;           }
+   33      (
+   34         STAR  factor  { temp  := temp *   value;  }
+   35      |  SLASH factor  { temp  := temp div value;  }
+   36      )*               { value := temp;            }
+   37   ;
+   38
+   39 factor
+   40 local
+   41 {
+   42   temp : integer;
+   43 }
+   44   :  uInt
+   45   |  LPAREN expression RPAREN
+   46   ;
+   47
+   48 uInt
+   49   : x:INT { value := StrToInt( x.TokenText); }
+   50   ;
+\end{verbatim}
+In line §49§, we specified that the rule must have a variable
+called 'x' which will contain the INT token. For the moment, it is
+enough to know that it has a property §TokenText§ which contains
+the text of the recognized token. This property is a string
+property, so we have to convert it to an integer, and store it in
+the §value§ member variable.
+
+\section{The project}
+The following simple project demonstrates how the defined lexer
+and parser classes are used.
+
+\begin{verbatim}
+    1 program calc;
+    2 {$APPTYPE CONSOLE}
+    3 uses
+    4    Classes,
+    5    SysUtils,
+    6    myLexer  in 'myLexer.pas',
+    7    myParser in 'myParser.pas';
+    8
+    9 var
+   10    stm:  TFileStream;
+   11    lex:  TmyLexer;
+   12    par:  TmyParser;
+   13
+   14 begin
+   15    if ParamCount <> 1 then
+   16    begin
+   17       writeln('usage: calc <filename>');
+   18       exit;
+   19    end
+   20    else
+   21    begin
+   22       try
+   23          stm := TFileStream.Create( ParamStr(1),
+   24                                     fmOpenRead);
+   24          lex := TmyLexer.Create(stm);
+   25          par := TmyParser.Create(lex);
+   26
+   27          par.calc;
+   28       except
+   29          on EdpgMismatchedToken do
+   22             writeln('Syntax error');
+   30          on EdpgMismatchedChar do
+   33             writeln('Syntax rrror');
+   29       end;
+   30    end;
+   31
+   32    stm.Free;
+   33    par.Free;
+   34 end.
+\end{verbatim}
@@ -0,0 +1,281 @@
+\chapter{Tokens}
+\minitoc \clearpage
+\section{Overview}
+Tokens are the basic building blocks of any parser or compiler.
+The task of a lexer (lexical analyzer, scanner) is to break up the
+input character stream into a stream of tokens. Then §nextToken§
+method of a lexer passes the next token to the parser, or throws
+an exception if the next character on the input stream cannot be
+matched by any of the public lexer rules. The §nextToken§ method
+is always synthesized from the public lexer rules.
+
+§Tokens§ in DPG are interface pointers. The interface type is §IdpgToken§,
+which defines the following properties:
+\begin{verbatim}
+   IdpgToken = interface
+      ...
+      property  TokenText   : string;
+      property  TokenType   : integer;
+      property  TokenLine   : integer;
+      property  TokenColumn : integer;
+      ...
+   end;
+\end{verbatim}
+
+where §TokenText§ is the text matched by the lexer; §TokenType§ is
+the type of token assigned to the token by DPG; §TokenLine§ is the
+line number where the token starts in the input stream;
+§TokenColumn§ is the column number.
+
+Within parser rules, the input token can be accessed via this interface. To
+obtain the interface to the recognized token, the reference to the token must
+be prefixed by a label. For example,
+\begin{verbatim}
+   ...
+   x:NUMBER
+   {
+      ...
+      LogMsg( 'Token: ' +          x.TokenText );
+      LogMsg( 'Type:  ' + IntToStr(x.TokenType));
+      ...
+   }
+   ...
+\end{verbatim}
+
+Note: Variables for labels are always generated by DPG, so you should not define
+them in the §local{...}§ section of the rule.
+
+\section{Defining tokens}
+In DPG, tokens can be defined in the lexer grammars. DPG always
+generates a token exchange file that describes all the token types
+matched by the lexer. This file can be imported in a parser
+grammar, so the lexer and parser have the same token types. Tokens
+can be defined either,
+\begin{itemize}
+   \item[-] via lexer rules, or
+   \item[-] in the lexer's §tokens{...}§ section
+\end{itemize}
+
+\subsubsection{Defining a token using a lexer rule}
+The commonest method of defining a token is using a lexer rule. In
+lexer grammars, every rule is associated with a §TokenType§ which
+is determined by DPG at compile time. This value is assigned to
+the result token by default, but it can be modified in the given
+rule if needed. This is used mostly in rules that need runtime
+information to set the type of the result token, but is otherwise
+uncommon.
+
+There is one exception: when a rule must not generate a token at all.
+This is useful for defining comments or white-spaces for a grammar.
+Every lexer rule has a local variable called §_ttype§. If
+§_ttype§ has a value of §TT_SKIP§, then the rule won't generate any token. For
+example,
+\begin{verbatim}
+   SLCOMMENT : "//" ( ~'\n')* '\n' { _ttype := TT_SKIP; } ;
+\end{verbatim}
+
+The following examples are normal lexer rules, and they are typical in lexers:
+\begin{verbatim}
+   LPAREN:      '(';
+   RPAREN:      ')';
+   DIGIT:       '0'..'9';
+   NUMBER:      DIGIT (DIGIT)*;
+   LETTER:      'a'..'Z' | 'A'..'Z';
+   ID:          LETTER (LETTER | DIGIT | '_')*;
+\end{verbatim}
+
+\subsubsection{Defining a token in the tokens\{...\} section}
+
+Lexer grammars may have a §tokens{...}§ section in the class
+declaration. Within this section you can define ``imaginary''
+tokens and string literals. These tokens are not ``real'' tokens
+and cannot be referenced in lexer rules. ``Imaginary'' tokens are
+helpful when a rule can recognize more than one type of token and
+defining rules for these tokens would be ambiguous. For example,
+
+\begin{verbatim}
+tokens
+{
+   STRING;
+   CHAR;
+}
+// ========================================================
+// String or char
+// ========================================================
+STRING_OR_CHAR
+:  '\'' (~'\'' | '\'' '\'')* '\''
+   {
+            if TokenText = ''''''       then  _ttype := TT_STRING
+      else  if TokenText = ''''''''''   then  _ttype := TT_CHAR
+      else  if Length( TokenText) > 3   then  _ttype := TT_STRING
+      else                                    _ttype := TT_CHAR;
+   }
+;
+\end{verbatim}
+The rule §STRING_OR_CHAR§ recognizes a pascal character literal,
+and a pascal string literal. The code in the action block decides
+which type of token must be created by the rule. Note: These
+tokens are ``imaginary'' tokens. Referencing them in lexer
+grammars is not possible, because they have no implementation.
+Within parser rules, the tokens §STRING§ and §CHAR§ can be
+referenced. But §STRING_OR_CHAR§ can't be referenced, because this
+rule creates a §STRING§ or a §CHAR§ token.
+
+\paragraph{String literals} in the §tokens{...}§ section are useful when the language
+defines keywords. In this case you can list your language's keywords in this
+section. They will be put into the lexer's literals table. The lexer will consult
+this table in the following cases:
+\begin{itemize}
+   \item[-] if the §testLiterals§ option for the lexer class is true, the lexer checks the
+   literals table after each recognized token,
+   \item[-] if the §testLiterals§ option for the lexer class is false, the
+   check will be executed in rules, that have this option set.
+\end{itemize}
+
+If neither lexer rules nor lexer class have this option set, the
+lexer's literals table can be explicitly checked via the
+§TestLiterals§ method. The advantage of using string literals is
+that you can reference them in the parser as they are defined in
+the §tokens{...}§ section. For example,
+
+\begin{verbatim}
+   ...
+   lexer TmyLexer;
+   options
+   {
+      testLiterals = true;
+   }
+   tokens
+   {
+      ...
+      "function";
+      "procedure";
+      ...
+   }
+   ...
+
+   parser TmyParser;
+   rule1 : "function"  ID SEMI;
+   rule2 : "procedure" ID LPAREN args RPAREN SEMI;
+   ...
+\end{verbatim}
+In the above example we set the §testLiterals§ option to true for the lexer
+class. This is not recommended, because the lexer will check the literals table
+even if it found a non-string token. Instead, you have to check the table in a
+rule that can recognize these literals. For example:
+
+\begin{verbatim}
+   ...
+   lexer TmyLexer;
+   ...
+
+   ID
+   options
+   {
+      testLiterals=true;
+   }
+      : 'a'..'z' | 'A'..'Z' ('a'..'z' | 'A'..'Z' | '0'..'9')*
+      ;
+\end{verbatim}
+Here the literals table will only be consulted in the rule §ID§.
+This will improve the lexer's speed. Of course you can set the
+§testLiterals§ options to true for as many rules as you want. All
+of them will check the literals table.
+
+\paragraph{Note:} The §testLiterals§ option has no effect for lexer rules.
+
+
+\section{User defined token classes}
+
+By default, DPG uses the §TdpgToken§ class to represent tokens.
+This class is derived from §TInterfacedObject§, and implements the
+§IdpgToken§ interface. This interface is used across the generated
+code. To define a new token class you must derive your new class
+from §TdpgToken§, implement your interface to access and
+manipulate your object, and finally tell the lexer that it must
+create your type of token object instead of the default
+§TdpgToken§. After that, within the rules you must obtain the
+interface of your class and use it. Let us have a more detailed
+look at this:
+
+1. Create a token class:
+\begin{verbatim}
+   ImyToken = interface( IdpgToken)
+     [a guid definition]
+
+     function   Get_MyString : string;
+     procedure  Set_MyString( AString: string);
+
+     property   MyString : string   read    Get_MyString
+                                    write   Set_MyString;
+   end;
+
+   TmyToken = class( TdpgToken,
+                     IdpgToken,
+                     ImyToken)
+   protected
+      fMyString : string;
+
+      function   Get_MyString : string;
+      procedure  Set_MyString( AString: string);
+
+   public
+      constructor Create(  pType: integer;
+                           pText: string); override;
+
+   end;
+
+   constructor TmyToken.Create( pType: integer;
+                                pText: string);
+   begin
+      inherited;
+      ...
+      your code here
+      ...
+   end;
+
+   function TmyToken.Get_MyString: string;
+   begin
+      result := fMyString;
+   end;
+
+   function TmyToken.Set_MyString( pString: string);
+   begin
+      fMyString := pString;
+   end;
+\end{verbatim}
+
+2. Tell to lexer that it must use our token class.
+\begin{verbatim}
+   uses myToken;
+   ...
+   myLexer.TokenClass := TmyToken;
+\end{verbatim}
+
+3. Use it in a rule.
+\begin{verbatim}
+   ...
+   parser TmyParser;
+
+   rule1
+      :
+         "procedure" x:id (LPAREN params RPAREN)?
+         {
+            (x as ImyToken).MyString := 'procid';
+         }
+      ;
+
+\end{verbatim}
+
+\paragraph{Note:} You must cast the returned interface to your token interface,
+because the §makeToken§ method of the lexer always returns an §IdpgToken§
+interface and the labels specified to obtain a reference to a token are always
+§IdpgToken§ references.
+
+\paragraph{Note:} If you have to do special actions to initialize your token
+class, you must have the same constructor as defined in the
+example. The §makeToken§ method of the lexer always creates tokens
+with this constructor. If you have another kind of constructor for
+your token class, it won't be used by the lexer.
+
+\clearpage