diff --git a/doc/grammars/calc/calc.dpr b/doc/grammars/calc/calc.dpr new file mode 100644 index 0000000..0c90e80 --- /dev/null +++ b/doc/grammars/calc/calc.dpr @@ -0,0 +1,36 @@ +program calc; + +{$APPTYPE CONSOLE} + +uses + Classes, + SysUtils, + calcLexer in 'calcLexer.pas', + calcParser in 'calcParser.pas'; + +var + stm: TFileStream; + lex: TcalcLexer; + par: TcalcParser; + +begin + if ParamCount <> 1 then + begin + writeln('usage: calc '); + exit; + end + else + begin + try + stm := TFileStream.Create( ParamStr(1), fmOpenRead); + lex := TcalcLexer.Create(stm); + par := TcalcParser.Create(lex); + + par.calc; + except + end; + end; + + stm.Free; + par.Free; +end. diff --git a/doc/grammars/calc/calc.txt b/doc/grammars/calc/calc.txt new file mode 100644 index 0000000..fb388c7 --- /dev/null +++ b/doc/grammars/calc/calc.txt @@ -0,0 +1,5 @@ +1+2+3+4+5+6+7+8+9; +(((((2+3))))); +(-1*(-2*(-3*(-4+ -5)))); +(-1*(-2*(-3*(-4+ 5)))); +7 * -(-9); \ No newline at end of file diff --git a/doc/grammars/calc/calcLexer.g b/doc/grammars/calc/calcLexer.g new file mode 100644 index 0000000..d7cd232 --- /dev/null +++ b/doc/grammars/calc/calcLexer.g @@ -0,0 +1,46 @@ +// ============================================================================ +// Demo lexer for a four operator calculator +// ============================================================================ +unit calcLexer; + +lexer TcalcLexer; +options +{ + exportVocab = calcLexer; + k = 2; +} + +// ============================================================================ +// Simple tokens +// ============================================================================ +LPAREN : '('; +RPAREN : ')'; + +PLUS : '+'; +MINUS : '-'; +STAR : '*'; +SLASH : '/'; + +SEMI : ';'; + +// ============================================================================ +// INT +// ============================================================================ +INT : ('0'..'9')+; + +// ============================================================================ +// White space +// ============================================================================ +WS + : + ( + '\r' '\n' { newLine; } + | '\r' { newLine; } + | '\n' { newLine; } + | '\t' { tab; } + | ' ' + ) + { + _ttype := TT_SKIP; + } + ; diff --git a/doc/grammars/calc/calcParser.g b/doc/grammars/calc/calcParser.g new file mode 100644 index 0000000..cdc7cbf --- /dev/null +++ b/doc/grammars/calc/calcParser.g @@ -0,0 +1,92 @@ +// ============================================================================ +// Demo parser for four operator calculator +// ============================================================================ +unit calcParser; + +parser TcalcParser; +options +{ + importVocab = calcLexer; + exportVocab = calcParser; +} +memberdecl +{ + value : integer; +} + +// ============================================================================ +// calc +// ============================================================================ +calc + : (expression SEMI {writeln(value);} )+ + ; + + +// ============================================================================ +// expression +// ============================================================================ +expression + : simpleExpression + ; + +// ============================================================================ +// simpleExpression +// ============================================================================ +simpleExpression +local +{ + temp: integer; +} + : term { temp := value; } + ( + PLUS term { temp := temp + value; } + | MINUS term { temp := temp - value; } + )* { value := temp; } + ; + +// ============================================================================ +// term +// ============================================================================ +term +local +{ + temp: integer; +} + : factor { temp := value; } + ( + STAR factor { temp := temp * value; } + | SLASH factor { temp := temp div value; } + )* { value := temp; } + ; + +// ============================================================================ +// factor +// ============================================================================ +factor +local +{ + s: integer; +} +{ + s := 1; +} + : + ( + PLUS { s := 1; } + | MINUS { s := -1; } + )? + ( + uInt + | LPAREN expression RPAREN + ) + { + value := s * value; + } + ; + +// ============================================================================ +// uInt +// ============================================================================ +uInt + : x:INT { value := StrToInt( x.TokenText); } + ; diff --git a/doc/grammars/calc/readme.txt b/doc/grammars/calc/readme.txt new file mode 100644 index 0000000..15c7183 --- /dev/null +++ b/doc/grammars/calc/readme.txt @@ -0,0 +1,10 @@ +To build demo project you must first compile the grammars. + +1. Open calc.dpp in DPG +2. Press F9 to compile the grammars + +After the compilation the project can be opened in delphi. Be sure that the dpg +runtime library is in the delphi library path. (In the project settings, +or in the environment settings). + +Have fun... \ No newline at end of file diff --git a/doc/grammars/filter/filter.g b/doc/grammars/filter/filter.g new file mode 100644 index 0000000..fcddd47 --- /dev/null +++ b/doc/grammars/filter/filter.g @@ -0,0 +1,43 @@ +unit filter; + +lexer Tfilter; +options +{ + k = 2; + filter = true; +} + +// ---------------------------------------------------------------------------- +// Paragraph +// ---------------------------------------------------------------------------- +P + : "

" + ; + +// ---------------------------------------------------------------------------- +// Break +// ---------------------------------------------------------------------------- +BR + : "
" + ; + +// ---------------------------------------------------------------------------- +// Newline +// ---------------------------------------------------------------------------- +NEWLINE + : + ( + '\r' '\n' { newLine; _ttype := TT_SKIP; } + | '\r' { newLine; _ttype := TT_SKIP; } + | '\n' { newLine; _ttype := TT_SKIP; } + ) + ; + +// ---------------------------------------------------------------------------- +// Tab +// ---------------------------------------------------------------------------- +TAB + : '\t' { tab; _ttype := TT_SKIP; } + ; + + diff --git a/doc/grammars/filter/filter.pas b/doc/grammars/filter/filter.pas new file mode 100644 index 0000000..94c5345 --- /dev/null +++ b/doc/grammars/filter/filter.pas @@ -0,0 +1,234 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.0.78r +// Grammar: filter.g +// ============================================================================ +unit filter; + +interface + +uses + Classes, + Contnrs, + dpgLexer, + dpgToken, + dpgTypes, + filterTokens, + SysUtils; + +type + // ========================================================================= + // Class Tfilter declaration + // ========================================================================= + Tfilter = class( TdpgLexer) + + protected // Public grammar rules ("rescoped") + procedure mP ( pCreate: boolean); + procedure mBR ( pCreate: boolean); + procedure mNEWLINE ( pCreate: boolean); + procedure mTAB ( pCreate: boolean); + + public + function NextToken: IdpgToken; override; + end; + +implementation +uses + dpgException, + dpgExceptionSemantic, + dpgExceptionMismatchedChar; + +// ============================================================================ +// mP +// ============================================================================ +procedure Tfilter.mP( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_P; + + match('

'); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mBR +// ============================================================================ +procedure Tfilter.mBR( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_BR; + + match('
'); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mNEWLINE +// ============================================================================ +procedure Tfilter.mNEWLINE( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_NEWLINE; + + if (( LA(1) in [#13]) and (LA(2) in [#10])) then + begin + match(#13); + match(#10); + newLine; _ttype := TT_SKIP; + end + + else if (( LA(1) in [#13])) then + begin + match(#13); + newLine; _ttype := TT_SKIP; + end + + else if (( LA(1) in [#10])) then + begin + match(#10); + newLine; _ttype := TT_SKIP; + end + + else + Raise EdpgMismatchedChar.Create( LA(1), [#10,#13], FileName, Line, Column); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mTAB +// ============================================================================ +procedure Tfilter.mTAB( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_TAB; + + match(#9); + tab; _ttype := TT_SKIP; + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ---------------------------------------------------------------------------- +// NextToken +// ---------------------------------------------------------------------------- +function Tfilter.NextToken : IdpgToken; +begin + while( true) do + begin + ResetText; + + try + if (( LA(1) in ['<']) and (LA(2) in ['p'])) then + begin + mP(true); + result := fReturnToken; + end + + else if (( LA(1) in ['<']) and (LA(2) in ['b'])) then + begin + mBR(true); + result := fReturnToken; + end + + else if (( LA(1) in [#10,#13])) then + begin + mNEWLINE(true); + result := fReturnToken; + end + + else if (( LA(1) in [#9])) then + begin + mTAB(true); + result := fReturnToken; + end + + else + begin + if LA(1) = EOF_CHAR then + begin + uponEof; + result := TdpgToken.Create(TT_EOF); + end + + else + begin + consume; + continue; + end; + end; + + // -------------------------------------------------------------- + // If we found a SKIP token, then try again... + // -------------------------------------------------------------- + if result = nil then + continue; + + // -------------------------------------------------------------- + // Now we have a valid token, so exit the function + // -------------------------------------------------------------- + break; + + except + consume; + continue; + end; + end; +end; + +end. diff --git a/doc/grammars/filter/filterTokens.pas b/doc/grammars/filter/filterTokens.pas new file mode 100644 index 0000000..b20bca4 --- /dev/null +++ b/doc/grammars/filter/filterTokens.pas @@ -0,0 +1,19 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.0.78r +// Grammar: filter.g +// ============================================================================ +unit filterTokens; + +interface + +const + TT_EOF = 1; + TT_P = 4; + TT_BR = 5; + TT_NEWLINE = 6; + TT_TAB = 7; + +implementation +end. diff --git a/doc/grammars/filter/filterTokens.txt b/doc/grammars/filter/filterTokens.txt new file mode 100644 index 0000000..a93fffe --- /dev/null +++ b/doc/grammars/filter/filterTokens.txt @@ -0,0 +1,7 @@ +// $Delphi Parser Generator: filter.pas -> TfilterTokens.txt$ +Tfilter +TT_EOF=1 +TT_P=4 +TT_BR=5 +TT_NEWLINE=6 +TT_TAB=7 diff --git a/doc/grammars/multiLexer/JavaDocLexer.pas b/doc/grammars/multiLexer/JavaDocLexer.pas new file mode 100644 index 0000000..2652451 --- /dev/null +++ b/doc/grammars/multiLexer/JavaDocLexer.pas @@ -0,0 +1,367 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.0.118r +// Grammar: javadoclexer.g +// ============================================================================ +unit JavaDocLexer; + +interface + +uses + Classes, + Contnrs, + dpgLexer, + dpgToken, + dpgTokenStreamSelector, + dpgTypes, + JavaDocTokens, + SysUtils; + +type + // ========================================================================= + // Class TJavaDocLexer declaration + // ========================================================================= + TJavaDocLexer = class( TdpgLexer) + + public + Selector : IdpgTokenStreamSelector; + + protected // Protected grammar rules + procedure mID ( pCreate: boolean); + + protected // Public grammar rules ("rescoped") + procedure mPARAM ( pCreate: boolean); + procedure mEXCEPTION ( pCreate: boolean); + procedure mSTAR ( pCreate: boolean); + procedure mJAVADOC_CLOSE ( pCreate: boolean); + procedure mNEWLINE ( pCreate: boolean); + + public + function NextToken: IdpgToken; override; + end; + +implementation +uses + dpgException, + dpgExceptionSemantic, + dpgExceptionMismatchedChar; + +// ============================================================================ +// mPARAM +// ============================================================================ +procedure TJavaDocLexer.mPARAM( pCreate: boolean); +var + _begin: integer; + _cnt_3: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_PARAM; + + match('@param'); + _cnt_3 := 0; + + while(true) do + begin + if (( LA(1) in [' '])) then + begin + match(' '); + end + + else + begin + if _cnt_3 >= 1 then + break + else + Raise EdpgMismatchedChar.Create( LA(1), [' '], FileName, Line, Column); + end; + + INC(_cnt_3); + end; + mID(false); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mID +// ============================================================================ +procedure TJavaDocLexer.mID( pCreate: boolean); +var + _begin: integer; + _cnt_9: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_ID; + + _cnt_9 := 0; + + while(true) do + begin + if (( LA(1) in ['a'..'z'])) then + begin + match( ['a'..'z']); + end + + else + begin + if _cnt_9 >= 1 then + break + else + Raise EdpgMismatchedChar.Create( LA(1), ['a'..'z'], FileName, Line, Column); + end; + + INC(_cnt_9); + end; + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mEXCEPTION +// ============================================================================ +procedure TJavaDocLexer.mEXCEPTION( pCreate: boolean); +var + _begin: integer; + _cnt_6: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_EXCEPTION; + + match('@exception'); + _cnt_6 := 0; + + while(true) do + begin + if (( LA(1) in [' '])) then + begin + match(' '); + end + + else + begin + if _cnt_6 >= 1 then + break + else + Raise EdpgMismatchedChar.Create( LA(1), [' '], FileName, Line, Column); + end; + + INC(_cnt_6); + end; + mID(false); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mSTAR +// ============================================================================ +procedure TJavaDocLexer.mSTAR( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_STAR; + + match('*'); + _ttype := TT_SKIP; + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mJAVADOC_CLOSE +// ============================================================================ +procedure TJavaDocLexer.mJAVADOC_CLOSE( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_JAVADOC_CLOSE; + + match('*/'); + Selector.Pop; + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mNEWLINE +// ============================================================================ +procedure TJavaDocLexer.mNEWLINE( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_NEWLINE; + + if (( LA(1) in [#13]) and (LA(2) in [#10])) then + begin + match(#13); + match(#10); + end + + else if (( LA(1) in [#13])) then + begin + match(#13); + end + + else if (( LA(1) in [#10])) then + begin + match(#10); + end + + else + Raise EdpgMismatchedChar.Create( LA(1), [#10,#13], FileName, Line, Column); + newLine; + _ttype := TT_SKIP; + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ---------------------------------------------------------------------------- +// NextToken +// ---------------------------------------------------------------------------- +function TJavaDocLexer.NextToken : IdpgToken; +var + la1 : char; + la2 : char; +begin + while( true) do + begin + ResetText; + + try + la1 := LA(1); + la2 := LA(2); + + if (( LA(1) in ['@']) and (LA(2) in ['p'])) then + begin + mPARAM(true); + result := fReturnToken; + end + + else if (( LA(1) in ['@']) and (LA(2) in ['e'])) then + begin + mEXCEPTION(true); + result := fReturnToken; + end + + else if (( LA(1) in ['*']) and (LA(2) in ['/'])) then + begin + mJAVADOC_CLOSE(true); + result := fReturnToken; + end + + else if (( LA(1) in ['*'])) then + begin + mSTAR(true); + result := fReturnToken; + end + + else if (( LA(1) in [#10,#13])) then + begin + mNEWLINE(true); + result := fReturnToken; + end + + else + begin + if LA(1) = EOF_CHAR then + begin + uponEof; + result := TdpgToken.Create(TT_EOF); + end + + else + begin + consume; + continue; + end; + end; + + // -------------------------------------------------------------- + // If we found a SKIP token, then try again... + // -------------------------------------------------------------- + if result = nil then + continue; + + // -------------------------------------------------------------- + // Now we have a valid token, so exit the function + // -------------------------------------------------------------- + break; + + except + consume; + continue; + end; + end; +end; + +end. diff --git a/doc/grammars/multiLexer/JavaDocTokens.pas b/doc/grammars/multiLexer/JavaDocTokens.pas new file mode 100644 index 0000000..8aa1885 --- /dev/null +++ b/doc/grammars/multiLexer/JavaDocTokens.pas @@ -0,0 +1,21 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.0.118r +// Grammar: javadoclexer.g +// ============================================================================ +unit JavaDocTokens; + +interface + +const + TT_EOF = 1; + TT_PARAM = 4; + TT_EXCEPTION = 5; + TT_ID = 6; + TT_STAR = 7; + TT_JAVADOC_CLOSE = 8; + TT_NEWLINE = 9; + +implementation +end. diff --git a/doc/grammars/multiLexer/JavaDocTokens.txt b/doc/grammars/multiLexer/JavaDocTokens.txt new file mode 100644 index 0000000..10a1496 --- /dev/null +++ b/doc/grammars/multiLexer/JavaDocTokens.txt @@ -0,0 +1,9 @@ +// $Delphi Parser Generator: JavaDocLexer.pas -> TJavaDocLexerTokens.txt$ +TJavaDocLexer +TT_EOF=1 +TT_PARAM=4 +TT_EXCEPTION=5 +TT_ID=6 +TT_STAR=7 +TT_JAVADOC_CLOSE=8 +TT_NEWLINE=9 diff --git a/doc/grammars/multiLexer/JavaLexer.pas b/doc/grammars/multiLexer/JavaLexer.pas new file mode 100644 index 0000000..206dcd5 --- /dev/null +++ b/doc/grammars/multiLexer/JavaLexer.pas @@ -0,0 +1,286 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.0.118r +// Grammar: javalexer.g +// ============================================================================ +unit JavaLexer; + +interface + +uses + Classes, + Contnrs, + dpgLexer, + dpgToken, + dpgTokenStreamSelector, + dpgTypes, + JavaTokens, + SysUtils; + +type + // ========================================================================= + // Class TJavaLexer declaration + // ========================================================================= + TJavaLexer = class( TdpgLexer) + + public + Selector : IdpgTokenStreamSelector; + + protected // Internals + procedure initialize; override; + + protected // Public grammar rules ("rescoped") + procedure mSEMI ( pCreate: boolean); + procedure mJAVADOC_OPEN ( pCreate: boolean); + procedure mID ( pCreate: boolean); + procedure mWS ( pCreate: boolean); + + public + function NextToken: IdpgToken; override; + end; + +implementation +uses + dpgException, + dpgExceptionSemantic, + dpgExceptionMismatchedChar; + +// ============================================================================ +// mSEMI +// ============================================================================ +procedure TJavaLexer.mSEMI( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_SEMI; + + match(';'); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mJAVADOC_OPEN +// ============================================================================ +procedure TJavaLexer.mJAVADOC_OPEN( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_JAVADOC_OPEN; + + match('/**'); + Selector.Push('docLexer'); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mID +// ============================================================================ +procedure TJavaLexer.mID( pCreate: boolean); +var + _begin: integer; + _cnt_5: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_ID; + + _cnt_5 := 0; + + while(true) do + begin + if (( LA(1) in ['a'..'z'])) then + begin + match( ['a'..'z']); + end + + else + begin + if _cnt_5 >= 1 then + break + else + Raise EdpgMismatchedChar.Create( LA(1), ['a'..'z'], FileName, Line, Column); + end; + + INC(_cnt_5); + end; + _ttype := testLit( _ttype); + + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mWS +// ============================================================================ +procedure TJavaLexer.mWS( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_WS; + + if (( LA(1) in [' '])) then + begin + match(' '); + end + + else if (( LA(1) in [#9])) then + begin + match(#9); + end + + else if (( LA(1) in [#10,#13])) then + begin + if (( LA(1) in [#13]) and (LA(2) in [#10])) then + begin + match(#13); + match(#10); + end + + else if (( LA(1) in [#13])) then + begin + match(#13); + end + + else if (( LA(1) in [#10])) then + begin + match(#10); + end + + else + Raise EdpgMismatchedChar.Create( LA(1), [#10,#13], FileName, Line, Column); + newLine; + end + + else + Raise EdpgMismatchedChar.Create( LA(1), [#9..#10,#13,' '], FileName, Line, Column); + _ttype := TT_SKIP; + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ---------------------------------------------------------------------------- +// NextToken +// ---------------------------------------------------------------------------- +function TJavaLexer.NextToken : IdpgToken; +var + _first : TdpgCharSet; + +begin + _first := [#9..#10,#13,' ','/',';','a'..'z']; + + while( true) do + begin + ResetText; + + try + if (( LA(1) in [';'])) then + begin + mSEMI(true); + result := fReturnToken; + end + + else if (( LA(1) in ['/'])) then + begin + mJAVADOC_OPEN(true); + result := fReturnToken; + end + + else if (( LA(1) in ['a'..'z'])) then + begin + mID(true); + result := fReturnToken; + end + + else if (( LA(1) in [#9..#10,#13,' '])) then + begin + mWS(true); + result := fReturnToken; + end + + else + begin + if LA(1) = EOF_CHAR then + begin + uponEof; + result := TdpgToken.Create(TT_EOF); + end + + else + Raise EdpgMismatchedChar.Create(LA(1), _first, FileName, Line, Column); + end; + + // -------------------------------------------------------------- + // If we found a SKIP token, then try again... + // -------------------------------------------------------------- + if result = nil then + continue; + + // -------------------------------------------------------------- + // Now we have a valid token, so exit the function + // -------------------------------------------------------------- + break; + + except + Raise; + end; + end; +end; + +// ---------------------------------------------------------------------------- +// InitLiterals +// ---------------------------------------------------------------------------- +procedure TJavaLexer.initialize; +begin + fLiterals.Add('int', 10); +end; + +end. diff --git a/doc/grammars/multiLexer/JavaTokens.pas b/doc/grammars/multiLexer/JavaTokens.pas new file mode 100644 index 0000000..e4cef1e --- /dev/null +++ b/doc/grammars/multiLexer/JavaTokens.pas @@ -0,0 +1,25 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.0.118r +// Grammar: javalexer.g +// ============================================================================ +unit JavaTokens; + +interface + +const + TT_EOF = 1; + TT_PARAM = 4; + TT_EXCEPTION = 5; + TT_ID = 6; + TT_STAR = 7; + TT_JAVADOC_CLOSE = 8; + TT_NEWLINE = 9; + LT_int = 10; + TT_SEMI = 11; + TT_JAVADOC_OPEN = 12; + TT_WS = 13; + +implementation +end. diff --git a/doc/grammars/multiLexer/JavaTokens.txt b/doc/grammars/multiLexer/JavaTokens.txt new file mode 100644 index 0000000..1965254 --- /dev/null +++ b/doc/grammars/multiLexer/JavaTokens.txt @@ -0,0 +1,13 @@ +// $Delphi Parser Generator: JavaLexer.pas -> TJavaLexerTokens.txt$ +TJavaLexer +TT_EOF=1 +TT_PARAM=4 +TT_EXCEPTION=5 +TT_ID=6 +TT_STAR=7 +TT_JAVADOC_CLOSE=8 +TT_NEWLINE=9 +LT_int="int"=10 +TT_SEMI=11 +TT_JAVADOC_OPEN=12 +TT_WS=13 diff --git a/doc/grammars/multiLexer/demo.dpr b/doc/grammars/multiLexer/demo.dpr new file mode 100644 index 0000000..ac84a45 --- /dev/null +++ b/doc/grammars/multiLexer/demo.dpr @@ -0,0 +1,49 @@ +program demo; + +{$APPTYPE CONSOLE} + +uses + Classes, + SysUtils, + dpgTokenStreamSelector, + javaLexer, + javaDocLexer, + javaParser; + +var + stm : TFileStream; + lexJava : TJavaLexer; + lexJavaDoc : TJavaDocLexer; + parJava : TJavaParser; + sel : TdpgTokenStreamSelector; + +begin + if ParamCount = 1 then + begin + try + stm := TFileStream.Create( ParamStr(1), fmOpenRead); + sel := TdpgTokenStreamSelector.Create; + + lexJava := TJavaLexer.Create( stm); + lexJavaDoc := TJavaDocLexer.Create( lexJava.InputState); + + lexJava.Selector := sel; + lexJavaDoc.Selector := sel; + + sel.add( lexJava, 'main'); + sel.add( lexJavaDoc, 'docLexer'); + sel.select( 'main'); + + parJava := TJavaParser.Create( sel); + + parJava.input; + except + writeln('Exception...'); + end; + end; + + + + + { TODO -oUser -cConsole Main : Insert code here } +end. diff --git a/doc/grammars/multiLexer/javaDocLexer.g b/doc/grammars/multiLexer/javaDocLexer.g new file mode 100644 index 0000000..1e5ddb7 --- /dev/null +++ b/doc/grammars/multiLexer/javaDocLexer.g @@ -0,0 +1,76 @@ +unit JavaDocLexer; + +uses +{ + dpgTokenStreamSelector; +} + +lexer TJavaDocLexer; +options +{ + k = 2; + exportVocab = JavaDoc; + filter = true; +} + +memberdecl +{ + public + Selector : IdpgTokenStreamSelector; +} + +// ---------------------------------------------------------------------------- +// @param +// ---------------------------------------------------------------------------- +PARAM + : "@param" (' ')+ ID + ; + +// ---------------------------------------------------------------------------- +// @exception +// ---------------------------------------------------------------------------- +EXCEPTION + : "@exception" (' ')+ ID + ; + +// ---------------------------------------------------------------------------- +// identifier +// ---------------------------------------------------------------------------- +protected ID + : ('a'..'z')+ + ; + +// ---------------------------------------------------------------------------- +// Star +// +// This rule simply prevents JAVADOC_CLOSE from being called for every '*' in +// a comment. Calling JAVADOC_CLOSE will fail for simple '*' and cause an +// exception, which is slow. In other words, the grammar will work without +// this rule, but is slower. +// ---------------------------------------------------------------------------- +STAR + : '*' { _ttype := TT_SKIP; } + ; + +// ---------------------------------------------------------------------------- +// JavaDocClose +// ---------------------------------------------------------------------------- +JAVADOC_CLOSE + : "*/" { Selector.Pop; } + ; + +// ---------------------------------------------------------------------------- +// NewLine +// ---------------------------------------------------------------------------- +NEWLINE + : + ( + '\r' '\n' + | '\r' + | '\n' + ) + { + newLine; + _ttype := TT_SKIP; + } + ; \ No newline at end of file diff --git a/doc/grammars/multiLexer/javaLexer.g b/doc/grammars/multiLexer/javaLexer.g new file mode 100644 index 0000000..caff66b --- /dev/null +++ b/doc/grammars/multiLexer/javaLexer.g @@ -0,0 +1,68 @@ +unit JavaLexer; + +uses +{ + dpgTokenStreamSelector; +} + +lexer TJavaLexer; +options +{ + k = 2; + importVocab = JavaDoc; + exportVocab = Java; +} + +tokens +{ + "int"; +} + +memberdecl +{ + public + Selector : IdpgTokenStreamSelector; +} + +// ---------------------------------------------------------------------------- +// Simple tokens +// ---------------------------------------------------------------------------- +SEMI : ';'; + +// ---------------------------------------------------------------------------- +// JavaDocOpen +// ---------------------------------------------------------------------------- +JAVADOC_OPEN + : "/**" { Selector.Push('docLexer'); } + ; + +// ---------------------------------------------------------------------------- +// Identifier +// ---------------------------------------------------------------------------- +ID +options +{ + testLiterals = true; +} + : ('a'..'z')+ + ; + +WS + : + ( + ' ' + | '\t' + | + ( + '\r' '\n' + | '\r' + | '\n' + ) + { + newLine; + } + ) + { + _ttype := TT_SKIP; + } + ; \ No newline at end of file diff --git a/doc/grammars/multiLexer/javaParser.g b/doc/grammars/multiLexer/javaParser.g new file mode 100644 index 0000000..f46b339 --- /dev/null +++ b/doc/grammars/multiLexer/javaParser.g @@ -0,0 +1,20 @@ +unit javaParser; + +parser TJavaParser; +options +{ + k = 2; + importVocab = Java; +} + +input + : ( (javadoc)? "int" ID SEMI)+ + ; + +javadoc + : + JAVADOC_OPEN + (PARAM)? + (EXCEPTION)? + JAVADOC_CLOSE + ; \ No newline at end of file diff --git a/doc/grammars/multiLexer/javaParser.pas b/doc/grammars/multiLexer/javaParser.pas new file mode 100644 index 0000000..8754de0 --- /dev/null +++ b/doc/grammars/multiLexer/javaParser.pas @@ -0,0 +1,91 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.0.118r +// Grammar: javaparser.g +// ============================================================================ +unit javaParser; + +interface + +uses + Classes, + Contnrs, + dpgLLkParser, + dpgToken, + dpgTypes, + javaParserTokens, + SysUtils; + +type + // ========================================================================= + // Class TJavaParser declaration + // ========================================================================= + TJavaParser = class( TdpgLLkParser) + + public // Public grammar rules + procedure input ; + procedure javadoc ; + + end; + +implementation +uses + dpgException, + dpgExceptionSemantic, + dpgExceptionMismatchedToken; + +// ============================================================================ +// input +// ============================================================================ +procedure TJavaParser.input; +var + _cnt_4: integer; + +begin + _cnt_4 := 0; + + while(true) do + begin + if (( LA(1) in [LT_int,TT_JAVADOC_OPEN])) then + begin + if (( LA(1) in [TT_JAVADOC_OPEN])) then + begin + javadoc; + end; + match(LT_int); + match(TT_ID); + match(TT_SEMI); + end + + else + begin + if _cnt_4 >= 1 then + break + else + Raise EdpgMismatchedToken.Create( LT(1), [LT_int,TT_JAVADOC_OPEN], FileName); + end; + + INC(_cnt_4); + end; +end; + +// ============================================================================ +// javadoc +// ============================================================================ +procedure TJavaParser.javadoc; +begin + + match(TT_JAVADOC_OPEN); + if (( LA(1) in [TT_PARAM])) then + begin + match(TT_PARAM); + end; + if (( LA(1) in [TT_EXCEPTION])) then + begin + match(TT_EXCEPTION); + end; + match(TT_JAVADOC_CLOSE); +end; + +end. diff --git a/doc/grammars/multiLexer/javaParserTokens.pas b/doc/grammars/multiLexer/javaParserTokens.pas new file mode 100644 index 0000000..86767a1 --- /dev/null +++ b/doc/grammars/multiLexer/javaParserTokens.pas @@ -0,0 +1,25 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.0.118r +// Grammar: javaparser.g +// ============================================================================ +unit javaParserTokens; + +interface + +const + TT_EOF = 1; + TT_PARAM = 4; + TT_EXCEPTION = 5; + TT_ID = 6; + TT_STAR = 7; + TT_JAVADOC_CLOSE = 8; + TT_NEWLINE = 9; + LT_int = 10; + TT_SEMI = 11; + TT_JAVADOC_OPEN = 12; + TT_WS = 13; + +implementation +end. diff --git a/doc/grammars/multiLexer/javaParserTokens.txt b/doc/grammars/multiLexer/javaParserTokens.txt new file mode 100644 index 0000000..b11fbb9 --- /dev/null +++ b/doc/grammars/multiLexer/javaParserTokens.txt @@ -0,0 +1,13 @@ +// $Delphi Parser Generator: javaParser.pas -> TJavaParserTokens.txt$ +TJavaParser +TT_EOF=1 +TT_PARAM=4 +TT_EXCEPTION=5 +TT_ID=6 +TT_STAR=7 +TT_JAVADOC_CLOSE=8 +TT_NEWLINE=9 +LT_int="int"=10 +TT_SEMI=11 +TT_JAVADOC_OPEN=12 +TT_WS=13 diff --git a/doc/grammars/multiLexer/test.in b/doc/grammars/multiLexer/test.in new file mode 100644 index 0000000..b50f9fd --- /dev/null +++ b/doc/grammars/multiLexer/test.in @@ -0,0 +1,20 @@ +/** a javadoc comment + * @param foo + * @exception bar + * Just a little text for a comment + */ +int abc; + +/** a javadoc comment + * @param foo + * @exception bar + * Just a little text for a comment + */ +int zzz; + +/** a javadoc comment + * @param foo + * @exception bar + * Just a little text for a comment + */ +int xxx; diff --git a/doc/grammars/pascal/readme.txt b/doc/grammars/pascal/readme.txt new file mode 100644 index 0000000..23d3a5a --- /dev/null +++ b/doc/grammars/pascal/readme.txt @@ -0,0 +1,10 @@ +To build a project you must first compile the grammars. + +After the compilation the project can be opened in delphi. Be sure that the dpg +runtime library is in the delphi library path. (In the project settings, +or in the environment settings). + +Have fun... + +ps.: I'm not sure that the grammar is correct, or not... + This is only for showing dpg features... \ No newline at end of file diff --git a/doc/grammars/pascal/tools/ast/ast.pas b/doc/grammars/pascal/tools/ast/ast.pas new file mode 100644 index 0000000..58e3b52 --- /dev/null +++ b/doc/grammars/pascal/tools/ast/ast.pas @@ -0,0 +1,359 @@ +{ + * A Child-Sibling Tree. + * + * A tree with PLUS at the root and with two children 3 and 4 is + * structured as: + * + * PLUS + * | + * 3 -- 4 + * + * and can be specified easily in LISP notation as + * + * (PLUS 3 4) + * + * where every '(' starts a new subtree. + * + * These trees are particular useful for translators because of + * the flexibility of the children lists. They are also very easy + * to walk automatically, whereas trees with specific children + * reference fields can't easily be walked automatically. + * + * This class contains the basic support for an AST. + * Most people will create ASTs that are subclasses of + * BaseAST or of CommonAST. + */ +} +unit ast; + +interface +uses + Classes, + Generics.Collections; +// dpgrtl.Token; + + +type + TAST = class; + TASTList = TList; + + TAST = class + protected + fDown : TAST; + fRight : TAST; + + fAstText : AnsiString; + fAstType : integer; + fAstLine : integer; + fAstColumn : integer; + + private + fVerbose : boolean; // verbose string conversion + fTokenNames : TStringList; + + private + procedure DoFindAll( NodeToSearch : TAST; + // v : Vector; + Target : TAST; + PartialMatch : boolean); + + protected + function GetNumberofChildren: integer; + + function GetEquals( Node : TAST): boolean; virtual; + function GetEqualsList( Node : TAST): boolean; virtual; + function GetEqualsListPartial( Node : TAST): boolean; virtual; + function GetEqualsTree( Node : TAST): boolean; virtual; + function GetEqualsTreePartial( Node : TAST): boolean; virtual; + + function GetTokenNames : TStringList; + + procedure SetVerbose( Verbose : boolean; + Names : TStringList); + + public + procedure Initialize( AstType : integer; + AstText : AnsiString); overload; virtual; abstract; + + procedure Initialize( Node : TAST); overload; virtual; abstract; +// procedure Initialize( Token : TToken); overload; virtual; abstract; + + procedure AddChild( node: TAST); + procedure RemoveChildren; + + public + procedure AfterConstruction; override; + procedure BeforeDestruction; override; + + public + property NumberOfChildren : integer read GetNumberofChildren; + + property NextSibling : TAST read fRight write fRight; + property FirstChild : TAST read fDown write fDown; + + property AstText : AnsiString read fAstText write fAstText; + property AstType : integer read fAstType write fAstType; + property AstLine : integer read fAstLine write fAstLine; + property AstColumn : integer read fAstColumn write fAstColumn; + + property Equals [n: TAST]: boolean read GetEquals; + property EqualsList [n: TAST]: boolean read GetEqualsList; + property EqualsListPartial [n: TAST]: boolean read GetEqualsListPartial; + property EqualsTree [n: TAST]: boolean read GetEqualsTree; + property EqualsTreePartial [n: TAST]: boolean read GetEqualsTreepartial; + end; + + TASTClass = class of TAST; + + +implementation + +{ TastBase } + +procedure TAST.AfterConstruction; +begin + inherited; + fTokenNames := TStringList.Create; +end; + +procedure TAST.BeforeDestruction; +begin + fTokenNames.Free; + inherited; +end; + +// ================================================================================================ +// DoFindAll +// ================================================================================================ +procedure TAST.DoFindAll(NodeToSearch, Target: TAST; PartialMatch: boolean); +var + sibling : TAST; + child : TAST; + +begin + // start walking sibling lists, looking for matches + sibling := NodeToSearch; + + while Assigned(sibling) do + begin + if ( PartialMatch and sibling.EqualsTreePartial[Target]) or + ( not PartialMatch and sibling.EqualsTree[Target]) then + // v.appendelement(sibling) + ; + + if Assigned( sibling.FirstChild) then + DoFindAll( sibling.FirstChild, {v,} Target, PartialMatch); + + sibling := sibling.NextSibling + end; + +end; + + +// ================================================================================================ +// Get Number of Children +// ================================================================================================ +function TAST.GetNumberofChildren: integer; +var + t: TAST; + n: integer; + +begin + t := fDown; + n := 0; + + if Assigned(t) then + begin + INC(n); + + while Assigned(t.fRight) do + begin + t := t.fRight; + INC(n) + end; + end; + + result := n +end; + +// ================================================================================================ +// Get Equals +// ================================================================================================ +function TAST.GetEquals(Node: TAST): boolean; +begin + if Assigned(Node) + then result := (Node.fAstText = fAstText) and (Node.fAstType = fAstType) + else result := false +end; + +// ================================================================================================ +// Get Equals List +// ================================================================================================ +function TAST.GetEqualsList(Node: TAST): boolean; +var + sibling: TAST; + +begin + result := false; + + if Assigned(Node) then + begin + sibling := self; + + while Assigned(sibling) and Assigned(Node) do + begin + // as a quick optimization, check roots firt + if not sibling.Equals[Node] then + break; + + // if roots match, do full list match test on children + if Assigned( sibling.FirstChild) then + begin + if not sibling.FirstChild.EqualsList[Node.FirstChild] then + break + end + + // sibling has no kids, make sure Node doesn't either + else if Assigned(Node.FirstChild) then + break; + + sibling := sibling .NextSibling; + Node := Node .NextSibling; + end; + + if not Assigned(sibling) and not Assigned(Node) then + result := true + end; +end; + +// ================================================================================================ +// Get Equals List Partial +// +// Is Node a subtree of this list ? The siblings of the root are NOT ignored. +// ================================================================================================ +function TAST.GetEqualsListPartial(Node: TAST): boolean; +var + sibling: TAST; + +begin + result := false; + + if Assigned(Node) then + begin + sibling := self; + + while Assigned(sibling) and Assigned(Node) do + begin + // as a quick optimization, check roots firt + if not sibling.Equals[Node] then + break; + + // if roots match, do partial list match test on children + if Assigned( sibling.FirstChild) then + if not sibling.FirstChild.EqualsListPartial[Node.FirstChild] then + break + end; + + if not Assigned(sibling) and Assigned(Node) + then result := false + else result := true + end; +end; + +// ================================================================================================ +// Get Equals Tree +// +// Is the tree rooted at *self* equals to *Node* ? +// The sibling of *self* are ignored. +// ================================================================================================ +function TAST.GetEqualsTree(Node: TAST): boolean; +begin + result := false; + + if Equals[Node] then + begin + // if roots match, do full list match test on children + if Assigned(FirstChild) then + begin + if not FirstChild.EqualsList[Node.FirstChild] then + exit; + end + + // No kids, make sure *Node* hasn't either + else if Assigned(Node.FirstChild) then + exit; + end; + + result := true +end; + +// ================================================================================================ +// Get Equals Tree Partial +// ================================================================================================ +function TAST.GetEqualsTreePartial(Node: TAST): boolean; +begin + result := false; + + if Equals[Node] then + if Assigned(FirstChild) then + if not FirstChild.EqualsListPartial[Node] then + exit; + + result := true +end; + +// ================================================================================================ +// Get Token Names +// ================================================================================================ +function TAST.GetTokenNames: TStringList; +begin + result := TStringList.Create; + result.AddStrings(fTokenNames); +end; + +// ================================================================================================ +// Set Verbose +// ================================================================================================ +procedure TAST.SetVerbose(Verbose: boolean; Names: TStringList); +begin + fVerbose := Verbose; + fTokenNames.Clear; + ftokenNames.AddStrings(Names); +end; + +// ================================================================================================ +// AddChild +// ================================================================================================ +procedure TAST.AddChild(node: TAST); +var + n: TAST; + +begin + if Assigned(node) then + begin + n := fDown; + + if Assigned(n) then + begin + while Assigned(n.fRight) do + n := n.fRight; + + n.fRight := node + end + + else + fDown := node + end; +end; + +// ================================================================================================ +// Remove Children +// ================================================================================================ +procedure TAST.RemoveChildren; +begin + fDown := nil +end; + + +end. + diff --git a/doc/grammars/pascal/tools/ast/astProgram.pas b/doc/grammars/pascal/tools/ast/astProgram.pas new file mode 100644 index 0000000..75a9da0 --- /dev/null +++ b/doc/grammars/pascal/tools/ast/astProgram.pas @@ -0,0 +1,12 @@ +unit astProgram; + +interface + +type + TastProgram = class + + end; + +implementation + +end. diff --git a/doc/grammars/pascal/wirth/block.gif b/doc/grammars/pascal/wirth/block.gif new file mode 100644 index 0000000..66f0d1f Binary files /dev/null and b/doc/grammars/pascal/wirth/block.gif differ diff --git a/doc/grammars/pascal/wirth/expr.gif b/doc/grammars/pascal/wirth/expr.gif new file mode 100644 index 0000000..62d0daf Binary files /dev/null and b/doc/grammars/pascal/wirth/expr.gif differ diff --git a/doc/grammars/pascal/wirth/idconst.gif b/doc/grammars/pascal/wirth/idconst.gif new file mode 100644 index 0000000..2ba07c4 Binary files /dev/null and b/doc/grammars/pascal/wirth/idconst.gif differ diff --git a/doc/grammars/pascal/wirth/stmt.gif b/doc/grammars/pascal/wirth/stmt.gif new file mode 100644 index 0000000..a0c086a Binary files /dev/null and b/doc/grammars/pascal/wirth/stmt.gif differ diff --git a/doc/grammars/pascal/wirth/test/wpTest.dpr b/doc/grammars/pascal/wirth/test/wpTest.dpr new file mode 100644 index 0000000..feffe20 --- /dev/null +++ b/doc/grammars/pascal/wirth/test/wpTest.dpr @@ -0,0 +1,38 @@ +program wpTest; + +{$APPTYPE CONSOLE} + +uses + Classes, + SysUtils, + wpLex in '..\wpLex.pas', + wpPar in '..\wpPar.pas', + astProgram in '..\..\tools\ast\astProgram.pas', + ast in '..\..\tools\ast\ast.pas'; + +var + stm : TFileStream; + lex : TwpLex; + par : TwpPar; + +begin + try + stm := TFileStream.Create('x.pas', fmOpenRead); + stm.Seek(0, soFromBeginning); + + lex := TwpLex.Create(stm); + par := TwpPar.Create(lex); + + par.prg; + + stm.Free; + lex.Free; + par.Free; + + + + except + on E: Exception do + Writeln(E.ClassName, ': ', E.Message); + end; +end. diff --git a/doc/grammars/pascal/wirth/test/wpTest.dproj b/doc/grammars/pascal/wirth/test/wpTest.dproj new file mode 100644 index 0000000..2382475 --- /dev/null +++ b/doc/grammars/pascal/wirth/test/wpTest.dproj @@ -0,0 +1,107 @@ + + + {E509B25A-366D-449B-B4C6-013162829AAF} + 12.0 + wpTest.dpr + Debug + DCC32 + + + true + + + true + Base + true + + + true + Base + true + + + wpTest.exe + 00400000 + WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE;$(DCC_UnitAlias) + x86 + false + false + false + false + false + + + false + RELEASE;$(DCC_Define) + 0 + false + + + DEBUG;$(DCC_Define) + + + + MainSource + + + + + + + Base + + + Cfg_2 + Base + + + Cfg_1 + Base + + + + + Delphi.Personality.12 + + + + + wpTest.dpr + + + False + True + False + + + False + False + 1 + 0 + 0 + 0 + False + False + False + False + False + 1031 + 1252 + + + + + 1.0.0.0 + + + + + + 1.0.0.0 + + + + + 12 + + diff --git a/doc/grammars/pascal/wirth/test/x.pas b/doc/grammars/pascal/wirth/test/x.pas new file mode 100644 index 0000000..9ee1f08 --- /dev/null +++ b/doc/grammars/pascal/wirth/test/x.pas @@ -0,0 +1,13 @@ +program prg1 (input,output); + +label 1,4,5; + +const + a = 3.14; + b = 'hello'; + c = d; + asd = 12345; + +begin + +end. diff --git a/doc/grammars/pascal/wirth/type.gif b/doc/grammars/pascal/wirth/type.gif new file mode 100644 index 0000000..dc5a50b Binary files /dev/null and b/doc/grammars/pascal/wirth/type.gif differ diff --git a/doc/grammars/pascal/wirth/var.gif b/doc/grammars/pascal/wirth/var.gif new file mode 100644 index 0000000..24655ce Binary files /dev/null and b/doc/grammars/pascal/wirth/var.gif differ diff --git a/doc/grammars/pascal/wirth/wpLex.pas b/doc/grammars/pascal/wirth/wpLex.pas new file mode 100644 index 0000000..4a2d8de --- /dev/null +++ b/doc/grammars/pascal/wirth/wpLex.pas @@ -0,0 +1,607 @@ +unit wpLex; + +interface +uses + Classes, + SysUtils, + Generics.Collections; + + +type + TTokenType = + ( + TT_EOF, + TT_SKIP, + TT_COMMENT, + + TT_LPAREN, // ( + TT_RPAREN, // ) + TT_LBRACKET, // [ + TT_RBRACKET, // ] + + TT_STAR, // * + TT_SLASH, // / + TT_PLUS, // + + TT_MINUS, // - + + TT_LT, // < + TT_LE, // <= + TT_GT, // > + TT_GE, // >= + + TT_EQ, // = + TT_NE, // <> + + TT_COLON, // : + TT_ASSIGN, // := + + TT_DOT, // . + TT_RANGE, // .. + TT_PTR, // ^ + TT_COMMA, // , + TT_SEMI, // ; + TT_DOLLAR, // $ + TT_AT, // @ + TT_SHARP, // # + + TT_ID, + TT_UINT, + TT_UREAL, + TT_CHAR, + TT_STRING, + TT_HEX, + TT_BIN, + + LT_DO, + LT_IF, + LT_IN, + LT_OF, + LT_OR, + LT_TO, + + LT_AND, + LT_DIV, + LT_END, + LT_FOR, + LT_MOD, + LT_NIL, + LT_NOT, + LT_SET, + LT_VAR, + LT_XOR, + + LT_CASE, + LT_ELSE, + LT_FILE, + LT_GOTO, + LT_THEN, + LT_TYPE, + LT_USES, + LT_WITH, + + LT_ARRAY, + LT_BEGIN, + LT_CONST, + LT_LABEL, + LT_UNTIL, + LT_WHILE, + + LT_DOWNTO, + LT_PACKED, + LT_RECORD, + LT_REPEAT, + + LT_PROGRAM, + LT_FUNCTION, + LT_PROCEDURE + ); + + TTokenTypes = set of TTokenType; + TBlah = set of byte; + + TToken = class + TokenType : TTokenType; + TokenText : AnsiString; + TokenLine : integer; + TokenColumn : integer; + end; + + TTokenMap = TDictionary; + + + TwpLex = class + private + fBuffer : PAnsiChar; + fStart : PAnsiChar; + fForward : PAnsiChar; + + fLiterals : TTokenMap; + + fTokenLine : integer; + fTokenColumn: integer; + + private + procedure InitLiterals; + + function CheckLiteral( TokenText : AnsiString; + TokenType : TTokenType): TTokenType; + + function MakeToken( TokenText : AnsiString; + TokenType : TTokenType): TToken; + + public + function NextToken : TToken; + + public + constructor Create( Stream: TStream; Length: Int64=-1); + destructor Destroy; override; + end; + + EwpLex = Exception; + + +implementation +uses + Windows; + + +{ TwpLex } + +// @@@: Construction/destruction ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// +// Construction/destruction +// +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ================================================================================================ +// Constructor +// ================================================================================================ +constructor TwpLex.Create(Stream: TStream; Length: Int64); +var + size : Int64; + token : TToken; + +begin + inherited Create; + + InitLiterals; + + if Assigned(Stream) then + begin + if Length < 0 + then size := Stream.Size - Stream.Position + else size := Length; + + fBuffer := GetMemory(size+1); + + Stream.Read( fBuffer^, size); + + fStart := fBuffer; + fForward := fBuffer; + fBuffer[size] := #0; + end; +end; + +// ================================================================================================ +// Destructor +// ================================================================================================ +destructor TwpLex.Destroy; +begin + FreeAndNil(fLiterals); + inherited; +end; + +// @@@: Internals +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// +// Internals +// +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ================================================================================================ +// Make Token +// ================================================================================================ +function TwpLex.MakeToken(TokenText: AnsiString; TokenType: TTokenType): TToken; +begin + result := TToken.Create; + result.TokenLine := 0; + result.TokenColumn := 0; + result.TokenType := TokenType; + result.TokenText := TokenText; + +// if TokenType = TT_COMMENT +// then result.TokenText := TokenText +// else result.TokenText := UpperCase(TokenText); +end; + +// ================================================================================================ +// Init Literals +// ================================================================================================ +procedure TwpLex.InitLiterals; +begin + fLiterals := TTokenMap.Create; + + fLiterals.Add('do', LT_DO); + fLiterals.Add('if', LT_IF); + fLiterals.Add('in', LT_IN); + fLiterals.Add('of', LT_OF); + fLiterals.Add('or', LT_OR); + fLiterals.Add('to', LT_TO); + + fLiterals.Add('and', LT_AND); + fLiterals.Add('div', LT_DIV); + fLiterals.Add('end', LT_END); + fLiterals.Add('for', LT_FOR); + fLiterals.Add('mod', LT_MOD); + fLiterals.Add('nil', LT_NIL); + fLiterals.Add('not', LT_NOT); + fLiterals.Add('set', LT_SET); + fLiterals.Add('var', LT_VAR); + fLiterals.Add('xor', LT_XOR); + + fLiterals.Add('case', LT_CASE); + fLiterals.Add('else', LT_ELSE); + fLiterals.Add('file', LT_FILE); + fLiterals.Add('goto', LT_GOTO); + fLiterals.Add('then', LT_THEN); + fLiterals.Add('type', LT_TYPE); + fLiterals.Add('uses', LT_USES); + fLiterals.Add('with', LT_WITH); + + fLiterals.Add('array', LT_ARRAY); + fLiterals.Add('begin', LT_BEGIN); + fLiterals.Add('const', LT_CONST); + fLiterals.Add('label', LT_LABEL); + fLiterals.Add('until', LT_UNTIL); + fLiterals.Add('while', LT_WHILE); + + fLiterals.Add('downto', LT_DOWNTO); + fLiterals.Add('packed', LT_PACKED); + fLiterals.Add('record', LT_RECORD); + fLiterals.Add('repeat', LT_REPEAT); + + fLiterals.Add('program', LT_PROGRAM); + fLiterals.Add('function', LT_FUNCTION); + fLiterals.Add('procedure', LT_PROCEDURE); +end; + +// ================================================================================================ +// Check Literal +// ================================================================================================ +function TwpLex.CheckLiteral(TokenText: AnsiString; TokenType: TTokenType): TTokenType; +var + ttype : TTokenType; + +begin + if fLiterals.TryGetValue(TokenText, ttype) + then result := ttype + else result := TokenType +end; + +// @@@: Interface +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// +// Interface +// +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ================================================================================================ +// Next Token +// ================================================================================================ +function TwpLex.NextToken: TToken; + + function GetTokenText: AnsiString; + begin + SetLength(result, fForward-fStart); + MoveMemory( @result[1], fStart, fForward-fStart); + end; + +var + ttext : AnsiString; + ttype : TTokenType; + +begin + result := nil; + + while true do + begin + result := nil; + fForward := fStart; + + case fForward^ of + + // id + 'a'..'z','A'..'Z','_': + begin + INC(fForward); + ttype := TT_ID; + + while fForward^ in ['a'..'z','A'..'Z','0'..'9','_'] do + INC(fForward); + + ttype := CheckLiteral( GetTokenText, ttype); + end; + + // uint or ureal + '0'..'9': + begin + INC(fForward); + ttype := TT_UINT; + + while fForward^ in ['0'..'9'] do + INC(fForward); + + // fractional part + if fForward^ = '.' then + begin + INC(fForward); + + if fForward^ in ['0'..'9'] then + begin + INC(fForward); + ttype := TT_UREAL; + + while fForward^ in ['0'..'9'] do + INC(fForward); + end + + else if fForward^ = '.' then + ttype := TT_RANGE + + else + raise EwpLex.Create('Expected 0..9 in fractional part'); + end; + + // exponential part + if ttype in [TT_UINT, TT_UREAL] then + begin + if fForward^ in ['e','E'] then + begin + INC(fForward); + ttype := TT_UREAL; + + if fForward^ in ['+','-'] then + INC(fForward); + + if fForward^ in ['0'..'9'] then + begin + INC(fForward); + + while fForward^ in ['0'..'9'] do + INC(fForward); + end + else + raise EwpLex.Create('Expected +,-,0..9 in exponential part'); + end; + end; + end; + + // <,<=,<> + '<': + begin + INC(fForward); + ttype := TT_LT; + + if fForward^ in ['=','>'] then + begin + case fForward^ of + '=': ttype := TT_LE; + '>': ttype := TT_NE; + end; + + INC(fForward); + end; + end; + + // >,>= + '>': + begin + INC(fForward); + ttype := TT_GT; + + if fForward^ = '=' then + begin + INC(fForward); + ttype := TT_GE; + end; + end; + + // :,:= + ':': + begin + INC(fForward); + ttype := TT_COLON; + + if fForward^ = '=' then + begin + INC(fForward); + ttype := TT_ASSIGN + end + end; + + // .,.. + '.': + begin + INC(fForward); + ttype := TT_DOT; + + if fForward^ = '.' then + begin + INC(fForward); + ttype := TT_RANGE; + end + end; + + // string + '''': + begin + INC(fForward); + ttype := TT_CHAR; + + while true do + begin + if fForward^ in [#10,#13,#0] then + raise EwpLex.Create('Newline/EOF found in string'); + + if fForward^ = '''' then + begin + INC(fForward); + + if fForward^ = '''' + then INC(fForward) + else break + + end + else + INC(fForward) + end; + end; + + // /,// + '/': + begin + INC(fForward); + ttype := TT_SLASH; + + if fForward^ = '/' then + begin + INC(fForward); + ttype := TT_COMMENT; + + while not (fForward^ in [#13,#10,#0]) do + INC(fForward); + end + end; + + // comment + '{': + begin + INC(fForward); + ttype := TT_COMMENT; + + while not (fForward^ in ['}',#0]) do + INC(fForward); + + if fForward^ = #0 + then raise EwpLex.Create('EOF reached in comment') + else INC( fForward); + end; + + '(': + begin + INC(fForward); + ttype := TT_LPAREN; + + if fForward^ = '*' then + begin + INC(fForward); + ttype := TT_COMMENT; + + while true do + begin + if fForward^ = #0 then + raise EwpLex.Create('EOF reached in comment'); + + if fForward^ = '*' then + begin + INC(fForward); + + if fForward^ = ')' then + begin + INC(fForward); + break; + end; + end + end + end + end; + + // hex number + '$': + begin + INC(fForward); + + if fForward^ in ['0'..'9','a'..'f','A'..'F'] then + begin + INC(fForward); + ttype := TT_HEX; + + while fForward^ in ['0'..'9','a'..'f','A'..'F'] do + INC(fForward); + end + + else + raise EwpLex.Create('Expected hexadecimal digit'); + end; + + '%': + begin + INC(fForward); + + if fForward^ in ['0'..'1'] then + begin + INC(fForward); + ttype := TT_BIN; + + while fForward^ in ['0'..'1'] do + INC(fForward); + end + + else + raise EwpLex.Create('Expected binary digit'); + end; + + else + case fForward^ of + ')': begin ttype := TT_RPAREN; INC(fForward) end; + '[': begin ttype := TT_LBRACKET; INC(fForward) end; + ']': begin ttype := TT_RBRACKET; INC(fForward) end; + + '*': begin ttype := TT_STAR; INC(fForward) end; + '+': begin ttype := TT_PLUS; INC(fForward) end; + '-': begin ttype := TT_MINUS; INC(fForward) end; + + '=': begin ttype := TT_EQ; INC(fForward) end; + + '^': begin ttype := TT_PTR; INC(fForward) end; + ';': begin ttype := TT_SEMI; INC(fForward) end; + ',': begin ttype := TT_COMMA; INC(fForward) end; + '$': begin ttype := TT_DOLLAR; INC(fForward) end; + + '@': begin ttype := TT_AT; INC(fForward) end; + '#': begin ttype := TT_SHARP; INC(fForward) end; + + #9 : begin ttype := TT_SKIP; INC(fForward) end; + #10: begin ttype := TT_SKIP; INC(fForward) end; + #13: begin ttype := TT_SKIP; INC(fForward) end; + #32: begin ttype := TT_SKIP; INC(fForward) end; + + // EOF + #0 : ttype := TT_EOF; + + else + raise EwpLex.Create('Invalid character '+fForward^); + end + end; + + if ttype <> TT_SKIP then + begin + ttext := GetTokenText; + result := MakeToken( ttext, ttype); + fStart := fForward; + + break; + end; + + fStart := fForward; + end; +end; + +end. diff --git a/doc/grammars/pascal/wirth/wpPar.pas b/doc/grammars/pascal/wirth/wpPar.pas new file mode 100644 index 0000000..9351f8c --- /dev/null +++ b/doc/grammars/pascal/wirth/wpPar.pas @@ -0,0 +1,418 @@ +unit wpPar; + +interface +uses + SysUtils, + Generics.Collections, + wpLex; + +type + TStringMap = TDictionary; + + + TwpPar = class + private + fLex : TwpLex; + + fConstants : TStringMap; + fTypes : TStringMap; + + protected + function Match( ttype : TTokenType; dispose: boolean=true):TToken; overload; + function Match( ttypes : TTokenTypes; dispose: boolean=true):TToken; overload; + + protected + procedure block; + procedure constant; + procedure uconstant; + procedure typedef; + + public + procedure prg; + + public + constructor Create( Lexer: TwpLex); + destructor Destroy; override; + end; + + EwpPar = Exception; + + +implementation + +{ TwpPar } + + + +// @@@: Construction / destruction ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// +// Construction / destruction +// +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ================================================================================================ +// Constructor +// ================================================================================================ +constructor TwpPar.Create(Lexer: TwpLex); +begin + inherited Create; + + fLex := Lexer; + fConstants := TStringMap.Create; + fTypes := TStringMap.Create; +end; + +// ================================================================================================ +// Destructor +// ================================================================================================ +destructor TwpPar.Destroy; +begin + fConstants .Free; + fTypes .Free; + + inherited +end; + + + +// @@@: Internals +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// +// Internals +// +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ================================================================================================ +// +// ================================================================================================ +function TwpPar.Match(ttype: TTokenType; dispose: boolean): TToken; +var + t: TToken; + +begin + result := nil; + t := fLex.NextToken; + + if t.TokenType = ttype then + if dispose + then t.Free + else result := t + + else + raise EwpPar.Create('Unexpected token') +end; + +// ================================================================================================ +// +// ================================================================================================ +function TwpPar.Match(ttypes: TTokenTypes; dispose: boolean): TToken; +var + t: TToken; + +begin + result := nil; + t := fLex.NextToken; + + if t.TokenType in ttypes then + if dispose + then t.Free + else result := t + else + raise EwpPar.Create('Unexpected token') +end; + +// @@@: Interface +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// +// Interface +// +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// ================================================================================================ +// Program +// ================================================================================================ +procedure TwpPar.prg; +var + t : TToken; + +begin + if Assigned( fLex) then + begin + Match(LT_PROGRAM); + Match(TT_ID); + Match(TT_LPAREN); + + t := fLex.NextToken; + + // id [, id]* + if t.TokenType = TT_ID then + begin + t.Free; + t := fLex.NextToken; + + while t.TokenType = TT_COMMA do + begin + t.Free; + Match(TT_ID); + t := fLex.NextToken; + end; + end; + + if t.TokenType <> TT_RPAREN then + raise EwpPar.Create('")" expected'); + + Match(TT_SEMI); + block; + Match(TT_DOT) + end; +end; + + +// ================================================================================================ +// block +// ================================================================================================ +procedure TwpPar.block; +var + token : TToken; + ttype : TTokenType; + start : TTokenTypes; + start2: TTokenTypes; + +begin + start := [LT_LABEL,LT_CONST,LT_TYPE,LT_VAR,LT_PROCEDURE,LT_FUNCTION,LT_BEGIN]; + start2:= [LT_LABEL,LT_CONST,LT_TYPE,LT_VAR,LT_PROCEDURE,LT_FUNCTION]; + token := fLex.NextToken; + + if token.TokenType in start then + begin + while token.TokenType in start2 do + begin + case token.TokenType of + // ------------------------------------------------------ + // Label + // ------------------------------------------------------ + LT_LABEL: + while true do + begin + Match( TT_UINT); + + token := fLex.NextToken; + ttype := token.TokenType; + + token.Free; + + case ttype of + TT_COMMA : ; + TT_SEMI : begin token := fLex.NextToken; break end; + else raise EwpPar.Create('Expected: ,;'); + end; + end; + + // ------------------------------------------------------ + // Const + // ------------------------------------------------------ + LT_CONST: + begin + token := fLex.NextToken; + ttype := token.TokenType; + + while ttype = TT_ID do + begin + token.Free; + Match(TT_EQ); + constant; + Match(TT_SEMI); + + token := flex.NextToken; + ttype := token.TokenType; + end; + end; + + // ------------------------------------------------------ + // Type + // ------------------------------------------------------ + LT_TYPE: + begin + token := fLex.NextToken; + ttype := token.TokenType; + + while ttype = TT_ID do + begin + token.Free; + Match(TT_EQ); + typedef; + Match(TT_SEMI); + + token := flex.NextToken; + ttype := token.TokenType; + end; + end; + + LT_VAR: + begin + + end; + + LT_PROCEDURE: + begin + + end; + + LT_FUNCTION: + begin + + end; + end; + +// token := fLex.NextToken + end; + + if token.TokenType = LT_BEGIN then + begin + + Match(LT_END) + end + + else + raise EwpPar.Create('Expected: Begin'); + end + + else + raise EwpPar.Create('Expected: label, const, type, var, procedure, function, begin'); +end; + + +// ================================================================================================ +// Constant +// ================================================================================================ +procedure TwpPar.constant; +var + token: TToken; + ttype: TTokenType; + +begin + token := fLex.NextToken; + ttype := token.TokenType; + + if ttype in [TT_PLUS,TT_MINUS,TT_ID,TT_UINT,TT_UREAL] then + begin + if ttype in [TT_PLUS,TT_MINUS] then + begin + token.Free; + + token := fLex.NextToken; + ttype := token.TokenType; + end; + + token.Free; + + case ttype of + TT_ID : ; + TT_UINT : ; + TT_UREAL : ; + else raise EwpPar.Create('Expected: id,int,real') + end + end + + else if ttype in [TT_CHAR, TT_STRING] then + begin + token.Free; + end + + else + raise EwpPar.Create('Expected: +,-,id,int,real,string'); +end; + +// ================================================================================================ +// Unsigned constant +// ================================================================================================ +procedure TwpPar.uconstant; +var + token: TToken; + ttype: TTokenType; + +begin + token := fLex.NextToken; + ttype := token.TokenType; + + token.Free; + + case token.TokenType of + TT_ID : ; + TT_UINT : ; + TT_UREAL : ; + LT_NIL : ; + TT_STRING: ; + else raise EwpPar.Create('Expected: id,int,real,nil,string') + end; +end; + +// ================================================================================================ +// Type +// ================================================================================================ +procedure TwpPar.typedef; +var + token: TToken; + ttype: TTokenType; + ttext: AnsiString; + + f_const : TTokenTypes; + +begin + token := fLex.NextToken; + ttype := token.TokenType; + ttext := token.TokenText; + + token.Free; + + // --------------------------------------------------------------- + // TT_ID + // + // Identifier can be an existing type identifier, or an existing + // constant identifier. If it is contant identifier, then it must + // be a range specification. + // --------------------------------------------------------------- + if ttype = TT_ID then + begin + // -------------------------------------------------- + // constant .. constant + // -------------------------------------------------- + if fConstants.ContainsKey(ttext) then + begin + Match(TT_RANGE); + + token := fLex.NextToken; + ttype := token.TokenType; + ttext := token.TokenText; + + token.Free; + end + + // -------------------------------------------------- + // type identifier + // -------------------------------------------------- + else if fTypes.ContainsKey(ttext) then + begin + + end + + // -------------------------------------------------- + // Not an constant or type identifier + // -------------------------------------------------- + else + EwpPar.Create('Expected a type or constant identifier' ); + end; + + +end; + + + +end. diff --git a/doc/grammars/pascal/wp.dpr b/doc/grammars/pascal/wp.dpr new file mode 100644 index 0000000..8cea72a --- /dev/null +++ b/doc/grammars/pascal/wp.dpr @@ -0,0 +1,42 @@ +program wp; + +{$APPTYPE CONSOLE} + +uses + Classes, + SysUtils, + dpgRTL, + + wpParser in 'wpParser.pas', + wpLexer in 'wpLexer.pas'; + +var + stm: TFileStream; + lex: TwpLexer; + par: TwpParser; + +begin + if ParamCount <> 1 then + begin + writeln('usage: wp '); + exit; + end; + + stm := nil; + par := nil; + + try + stm := TFileStream.Create( ParamStr(1), fmOpenRead); + lex := TwpLexer.Create( stm); + par := TwpParser.Create(lex); + + par.prog; + except + on e: EdpgMismatchedChar do writeln('SyntaxError: ' + IntToStr(e.Line)); + on e: EdpgMismatchedToken do writeln('SyntaxError: ' + IntToStr(e.FoundToken.TokenLine)); + else writeln('Syntax error'); + end; + + if stm <> nil then stm.free; + if par <> nil then par.free; +end. diff --git a/doc/grammars/pascal/wpLexer.g b/doc/grammars/pascal/wpLexer.g new file mode 100644 index 0000000..b4f7784 --- /dev/null +++ b/doc/grammars/pascal/wpLexer.g @@ -0,0 +1,237 @@ +unit wpLexer; + +lexer TwpLexer; +options +{ + exportVocab=wpLexer; + caseSensitive=false; + testLiterals=false; + k=2; +} + +tokens +{ + "do"; + "if"; + "in"; + "of"; + "or"; + "to"; + + "and"; + "div"; + "end"; + "for"; + "mod"; + "nil"; + "not"; + "set"; + "var"; + "xor"; + + "case"; + "else"; + "file"; + "goto"; + "then"; + "type"; + "uses"; + "with"; + + "array"; + "begin"; + "const"; + "label"; + "until"; + "while"; + + "downto"; + "packed"; + "record"; + "repeat"; + + "program"; + "function"; + "procedure"; + + STRING; + CHAR; +} + +// ============================================================================ +// Simple tokens +// ============================================================================ +LPAREN : '('; +RPAREN : ')'; + +LBRACKET : '['; +RBRACKET : ']'; + +COMMA : ','; +COLON : ':'; +SEMI : ';'; + +DOT : '.'; +RANGE : ".."; + +ASSIGN : ":="; + +EQ : '='; +GT : '>'; +LT : '<'; +GE : ">="; +LE : "<="; +NE : "<>"; + +PLUS : '+'; +MINUS : '-'; + +STAR : '*'; +SLASH : '/'; + +PTR : '^'; + +// ============================================================================ +// Identifier +// ============================================================================ +ID +options +{ + testLiterals=true; +} + : LETTER (LETTER | DIGIT)* ; + +// ============================================================================ +// Int or real +// ============================================================================ +UINT_OR_REAL + : (UINT RANGE) => UINT { _ttype := TT_UINT; } + | (UINT DOT) => UREAL { _ttype := TT_UREAL; } + | (UINT ('E'|'e')) => UREAL { _ttype := TT_UREAL; } + | UINT { _ttype := TT_UINT; } + ; + +// ============================================================================ +// Protected rules +// ============================================================================ +protected +LETTER : 'a'..'z' | 'A'..'Z' | '_'; + +// ============================================================================ +// Int +// ============================================================================ +protected +UINT + : (DIGIT)+ + ; + +// ============================================================================ +// Real +// ============================================================================ +protected +UREAL + : UINT ('.' UINT)? ( ('e' | 'E') ('+'|'-')? UINT)? + ; + +// ============================================================================ +// Digit +// ============================================================================ +protected +DIGIT + : '0'..'9' + ; + +// ============================================================================ +// String or char +// ============================================================================ +STRING_OR_CHAR + : '\'' (~'\'' | '\'' '\'')* '\'' + { + if TokenText = '''''' then _ttype := TT_STRING + else if TokenText = '''''''''' then _ttype := TT_CHAR + else if Length( TokenText) > 3 then _ttype := TT_STRING + else _ttype := TT_CHAR; + } + ; + +// ============================================================================ +// Single line comment +// ============================================================================ +SLCOMMENT + : + "//" + ( ~( '\r' | '\n') )* + ( + '\r' '\n' { newLine; } + | '\r' { newLine; } + | '\n' { newLine; } + ) + { + _ttype := TT_SKIP; + } + ; + +// ============================================================================ +// Multi line comment version 1 +// Nested comments aren't allowed! +// ============================================================================ +MLCOMMENT1 + : + "(*" + ( + options + { + greedy = false; + generateAmbigWarnings = false; + } + : '\r' '\n' { newLine; } + | '\r' { newLine; } + | '\n' { newLine; } + | . + )* + "*)" + { + _ttype := TT_SKIP; + } + ; + +// ============================================================================ +// Multi line comment version 2 +// Nested comments aren't allowed! +// ============================================================================ +MLCOMMENT2 + : + "{" + ( + options + { + greedy = false; + generateAmbigWarnings = false; + } + : '\r' '\n' { newLine; } + | '\r' { newLine; } + | '\n' { newLine; } + | . + )* + "}" + { + _ttype := TT_SKIP; + } + ; + +// ============================================================================ +// White space +// ============================================================================ +WS + : + ( + '\r' '\n' { newLine; } + | '\r' { newLine; } + | '\n' { newLine; } + | '\t' { tab; } + | ' ' + ) + { + _ttype := TT_SKIP; + } + ; diff --git a/doc/grammars/pascal/wpParser.g b/doc/grammars/pascal/wpParser.g new file mode 100644 index 0000000..4e2fa70 --- /dev/null +++ b/doc/grammars/pascal/wpParser.g @@ -0,0 +1,368 @@ +unit wpParser; + +parser TwpParser; +options +{ + importVocab = wpLexer; + k = 2; +} + +// ============================================================================ +// prog +// ============================================================================ +prog + : "program" id (LPAREN id (COLON id)* RPAREN)? SEMI block DOT + ; + +// ============================================================================ +// block +// ============================================================================ +block + : declarations compoundStmt + ; + +// ============================================================================ +// declarations +// ============================================================================ +declarations + : + ( "label" UINT (COMMA UINT)* SEMI )? + ( "const" (id EQ constant SEMI)+ )? + ( "type" (id EQ typeSpec SEMI)+ )? + ( "var" (id (COMMA id)* COLON typeSpec SEMI)+ )? + + ( + "procedure" id parameterList SEMI block SEMI + | "function" id parameterList COLON id SEMI block SEMI + )* + ; + +// ============================================================================ +// statement +// ============================================================================ +statement + : (UINT COLON)? + ( + (variable ASSIGN) => assignmentStmt + | procedureCall + | compoundStmt + | ifStmt + | caseStmt + | whileStmt + | repeatStmt + | forStmt + | withStmt + | gotoStmt + )? + ; + +// ============================================================================ +// assignmentStmt +// ============================================================================ +assignmentStmt + : variable ASSIGN expression + ; + +// ============================================================================ +// procedureCall +// ============================================================================ +procedureCall + : id (LPAREN expression (widthSpec)? (COMMA expression (widthSpec)? )* RPAREN)? + ; + +// ============================================================================ +// widthSpec +// ============================================================================ +widthSpec + : (COLON UINT) (COLON UINT)? + ; + + +// ============================================================================ +// compoundStmt +// ============================================================================ +compoundStmt + : "begin" (statement (SEMI statement)*)? "end" + ; + +// ============================================================================ +// ifStmt +// ============================================================================ +ifStmt + : "if" expression "then" statement + ( + ("else") => "else" statement + | + ) + ; + +// ============================================================================ +// caseStmt +// ============================================================================ +caseStmt + : "case" expression "of" + ( caseStmtItem (SEMI caseStmtItem)* )? + "end" + ; + +// ============================================================================ +// caseStmtItem +// ============================================================================ +caseStmtItem + : constant (COMMA constant)* COLON statement + ; + +// ============================================================================ +// whileStmt +// ============================================================================ +whileStmt + : "while" expression "do" statement + ; + +// ============================================================================ +// repeatStmt +// ============================================================================ +repeatStmt + : "repeat" (statement (SEMI statement)*)? "until" expression + ; + +// ============================================================================ +// forStmt +// ============================================================================ +forStmt + : "for" id ASSIGN expression ("to" | "downto") expression "do" statement + ; + +// ============================================================================ +// withStmt +// ============================================================================ +withStmt + : "with" variable (COMMA variable)* "do" statement + ; + +// ============================================================================ +// gotoStmt +// ============================================================================ +gotoStmt + : "goto" UINT + ; + + +// ============================================================================ +// parameterList +// ============================================================================ +parameterList + : ( + LPAREN + parameter (SEMI parameter)* + RPAREN + )? + ; + +// ============================================================================ +// parameter +// ============================================================================ +parameter + : ("var" | "function")? id (COMMA id)* COLON typeId + | "procedure" id (COMMA id)* + ; + + +// ============================================================================ +// expression +// ============================================================================ +expression + : simpleExpression (relOp simpleExpression)* + ; + +// ============================================================================ +// simpleExpression +// ============================================================================ +simpleExpression + : (PLUS|MINUS)? term (addOp term)* + ; + +// ============================================================================ +// term +// ============================================================================ +term + : factor (mulOp factor)* + ; + +// ============================================================================ +// factor +// ============================================================================ +factor + : uNumber + | "nil" + | CHAR + | STRING + | (id LPAREN) => procedureCall + | variable + | LPAREN expression RPAREN + | "not" factor + | LBRACKET (expression (RANGE expression)? (COMMA expression (RANGE expression)? )* )? RBRACKET + ; + +// ============================================================================ +// variable +// ============================================================================ +variable + : variableId + ( + LBRACKET expression (COMMA expression)* RBRACKET + | DOT fieldId + | PTR + )* + ; + +// ============================================================================ +// fieldList +// ============================================================================ +fieldList + : simpleFieldList (simpleFieldList)* (variantFieldList)? + | + ; + +// ============================================================================ +// simpleFieldList +// ============================================================================ +simpleFieldList + : id (COMMA id)* COLON typeSpec + ; + +// ============================================================================ +// caseFieldList +// ============================================================================ +variantFieldList + : "case" (id COLON)? typeId "of" + constant (COMMA constant)* COLON LPAREN fieldList RPAREN + (SEMI constant (COMMA constant)* COLON LPAREN fieldList RPAREN)* + ; + +// ============================================================================ +// typeSpecification +// ============================================================================ +typeSpec + : simpleType + | PTR typeId + | ("packed")? + ( + "array" LBRACKET simpleType (COMMA simpleType)* RBRACKET "of" typeSpec + | "file" "of" typeSpec + | "set" "of" simpleType + | "record" fieldList "end" + ) + ; + +// ============================================================================ +// simpleType +// ============================================================================ +simpleType + : (constant RANGE) => constant RANGE constant + | typeId + | LPAREN id (COMMA id)* RPAREN + ; + +// ============================================================================ +// constant +// ============================================================================ +constant + : (PLUS | MINUS)? (constantId | uNumber) + | CHAR + ; + +// ============================================================================ +// unsignedConstant +// ============================================================================ +uConstant + : constantId + | uNumber + | "nil" + | CHAR + ; + +// ============================================================================ +// unumber +// ============================================================================ +uNumber + : UINT + | UREAL + ; + +// ============================================================================ +// uint +// ============================================================================ +uInt + : UINT; + +// ============================================================================ +// fieldId +// ============================================================================ +fieldId + : id + ; + +// ============================================================================ +// variableId +// ============================================================================ +variableId + : id + ; + +// ============================================================================ +// typeId +// ============================================================================ +typeId + : id + ; + +// ============================================================================ +// constantId +// ============================================================================ +constantId + : id + ; + +// ============================================================================ +// id +// ============================================================================ +id + : ID + ; + +// ============================================================================ +// relOp +// ============================================================================ +relOp + : GT + | LT + | GE + | LE + | NE + | EQ + | "in" + ; + +// ============================================================================ +// addOp +// ============================================================================ +addOp + : PLUS + | MINUS + | "or" + | "xor" + ; + +// ============================================================================ +// mulOp +// ============================================================================ +mulOp + : STAR + | SLASH + | "div" + | "mod" + | "and" + ; + + diff --git a/doc/tutorial/calc/calc.dpr b/doc/tutorial/calc/calc.dpr new file mode 100644 index 0000000..0c90e80 --- /dev/null +++ b/doc/tutorial/calc/calc.dpr @@ -0,0 +1,36 @@ +program calc; + +{$APPTYPE CONSOLE} + +uses + Classes, + SysUtils, + calcLexer in 'calcLexer.pas', + calcParser in 'calcParser.pas'; + +var + stm: TFileStream; + lex: TcalcLexer; + par: TcalcParser; + +begin + if ParamCount <> 1 then + begin + writeln('usage: calc '); + exit; + end + else + begin + try + stm := TFileStream.Create( ParamStr(1), fmOpenRead); + lex := TcalcLexer.Create(stm); + par := TcalcParser.Create(lex); + + par.calc; + except + end; + end; + + stm.Free; + par.Free; +end. diff --git a/doc/tutorial/calc/calc.txt b/doc/tutorial/calc/calc.txt new file mode 100644 index 0000000..fb388c7 --- /dev/null +++ b/doc/tutorial/calc/calc.txt @@ -0,0 +1,5 @@ +1+2+3+4+5+6+7+8+9; +(((((2+3))))); +(-1*(-2*(-3*(-4+ -5)))); +(-1*(-2*(-3*(-4+ 5)))); +7 * -(-9); \ No newline at end of file diff --git a/doc/tutorial/calc/calcLexer.g b/doc/tutorial/calc/calcLexer.g new file mode 100644 index 0000000..bfc7a1c --- /dev/null +++ b/doc/tutorial/calc/calcLexer.g @@ -0,0 +1,49 @@ +// ============================================================================ +// Demo lexer for four operator calculator +// ============================================================================ +unit calcLexer; + +lexer TcalcLexer; +options +{ + exportVocab = calcLexer; +} + +// ============================================================================ +// Simple tokens +// ============================================================================ +LPAREN : '('; +RPAREN : ')'; + +PLUS : '+'; +MINUS : '-'; +STAR : '*'; +SLASH : '/'; + +SEMI : ';'; + +// ============================================================================ +// INT +// ============================================================================ +INT : ('0'..'9')+; + +// ============================================================================ +// White space +// ============================================================================ +WS + : + ( + options + { + generateAmbigWarnings = false; + } + : '\r' '\n' { newLine; } + | '\r' { newLine; } + | '\n' { newLine; } + | '\t' { tab; } + | ' ' + ) + { + _ttype := TT_SKIP; + } + ; diff --git a/doc/tutorial/calc/calcLexer.pas b/doc/tutorial/calc/calcLexer.pas new file mode 100644 index 0000000..c292243 --- /dev/null +++ b/doc/tutorial/calc/calcLexer.pas @@ -0,0 +1,430 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.1.0d +// Grammar: calcLexer +// ============================================================================ +unit calcLexer; + +interface + +uses + calcLexerTokens, + Classes, + Contnrs, + dpgLexer, + dpgToken, + dpgTypes, + SysUtils; + +type + // ========================================================================= + // Class TcalcLexer declaration + // ========================================================================= + TcalcLexer = class( TdpgLexer) + + protected // Public grammar rules ("rescoped") + procedure mLPAREN ( pCreate: boolean); + procedure mRPAREN ( pCreate: boolean); + procedure mPLUS ( pCreate: boolean); + procedure mMINUS ( pCreate: boolean); + procedure mSTAR ( pCreate: boolean); + procedure mSLASH ( pCreate: boolean); + procedure mSEMI ( pCreate: boolean); + procedure mINT ( pCreate: boolean); + procedure mWS ( pCreate: boolean); + + public + function NextToken: IdpgToken; override; + end; + +implementation +uses + dpgException, + dpgExceptionSemantic, + dpgExceptionMismatchedChar; + +// ============================================================================ +// mLPAREN +// ============================================================================ +procedure TcalcLexer.mLPAREN( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_LPAREN; + + match('('); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mRPAREN +// ============================================================================ +procedure TcalcLexer.mRPAREN( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_RPAREN; + + match(')'); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mPLUS +// ============================================================================ +procedure TcalcLexer.mPLUS( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_PLUS; + + match('+'); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mMINUS +// ============================================================================ +procedure TcalcLexer.mMINUS( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_MINUS; + + match('-'); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mSTAR +// ============================================================================ +procedure TcalcLexer.mSTAR( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_STAR; + + match('*'); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mSLASH +// ============================================================================ +procedure TcalcLexer.mSLASH( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_SLASH; + + match('/'); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mSEMI +// ============================================================================ +procedure TcalcLexer.mSEMI( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_SEMI; + + match(';'); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mINT +// ============================================================================ +procedure TcalcLexer.mINT( pCreate: boolean); +var + _begin: integer; + _cnt_10: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_INT; + + _cnt_10 := 0; + + while(true) do + begin + if (( LA(1) in ['0'..'9'])) then + begin + match( ['0'..'9']); + end + + else + begin + if _cnt_10 >= 1 then + break + else + Raise EdpgMismatchedChar.Create( LA(1), ['0'..'9'], FileName, Line, Column); + end; + + INC(_cnt_10); + end; + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mWS +// ============================================================================ +procedure TcalcLexer.mWS( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_WS; + + if (( LA(1) in [#13])) then + begin + match(#13); + match(#10); + newLine; + end + + else if (( LA(1) in [#13])) then + begin + match(#13); + newLine; + end + + else if (( LA(1) in [#10])) then + begin + match(#10); + newLine; + end + + else if (( LA(1) in [#9])) then + begin + match(#9); + tab; + end + + else if (( LA(1) in [' '])) then + begin + match(' '); + end + + else + Raise EdpgMismatchedChar.Create( LA(1), [#9..#10,#13,' '], FileName, Line, Column); + _ttype := TT_SKIP; + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ---------------------------------------------------------------------------- +// NextToken +// ---------------------------------------------------------------------------- +function TcalcLexer.NextToken : IdpgToken; +var + _first : TdpgCharSet; + +begin + _first := [#9..#10,#13,' ','('..'+','-','/'..'9',';']; + + while( true) do + begin + ResetText; + + try + if (( LA(1) in ['('])) then + begin + mLPAREN(true); + result := fReturnToken; + end + + else if (( LA(1) in [')'])) then + begin + mRPAREN(true); + result := fReturnToken; + end + + else if (( LA(1) in ['+'])) then + begin + mPLUS(true); + result := fReturnToken; + end + + else if (( LA(1) in ['-'])) then + begin + mMINUS(true); + result := fReturnToken; + end + + else if (( LA(1) in ['*'])) then + begin + mSTAR(true); + result := fReturnToken; + end + + else if (( LA(1) in ['/'])) then + begin + mSLASH(true); + result := fReturnToken; + end + + else if (( LA(1) in [';'])) then + begin + mSEMI(true); + result := fReturnToken; + end + + else if (( LA(1) in ['0'..'9'])) then + begin + mINT(true); + result := fReturnToken; + end + + else if (( LA(1) in [#9..#10,#13,' '])) then + begin + mWS(true); + result := fReturnToken; + end + + else + begin + if LA(1) = EOF_CHAR then + begin + uponEof; + result := TdpgToken.Create(TT_EOF); + end + + else + Raise EdpgMismatchedChar.Create(LA(1), _first, FileName, Line, Column); + end; + + // -------------------------------------------------------------- + // If we found a SKIP token, then try again... + // -------------------------------------------------------------- + if result = nil then + continue; + + // -------------------------------------------------------------- + // Now we have a valid token, so exit the function + // -------------------------------------------------------------- + break; + + except + Raise; + end; + end; +end; + +end. diff --git a/doc/tutorial/calc/calcLexerTokens.pas b/doc/tutorial/calc/calcLexerTokens.pas new file mode 100644 index 0000000..3ba0e97 --- /dev/null +++ b/doc/tutorial/calc/calcLexerTokens.pas @@ -0,0 +1,24 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.1.0d +// Grammar: calcLexer +// ============================================================================ +unit calcLexerTokens; + +interface + +const + TT_EOF = 1; + TT_LPAREN = 4; + TT_RPAREN = 5; + TT_PLUS = 6; + TT_MINUS = 7; + TT_STAR = 8; + TT_SLASH = 9; + TT_SEMI = 10; + TT_INT = 11; + TT_WS = 12; + +implementation +end. diff --git a/doc/tutorial/calc/calcLexerTokens.txt b/doc/tutorial/calc/calcLexerTokens.txt new file mode 100644 index 0000000..e1bf466 --- /dev/null +++ b/doc/tutorial/calc/calcLexerTokens.txt @@ -0,0 +1,12 @@ +// $Delphi Parser Generator: calcLexer -> calcLexerTokens.txt$ +TcalcLexer +TT_EOF=1 +TT_LPAREN=4 +TT_RPAREN=5 +TT_PLUS=6 +TT_MINUS=7 +TT_STAR=8 +TT_SLASH=9 +TT_SEMI=10 +TT_INT=11 +TT_WS=12 diff --git a/doc/tutorial/calc/calcParser.g b/doc/tutorial/calc/calcParser.g new file mode 100644 index 0000000..ed1db46 --- /dev/null +++ b/doc/tutorial/calc/calcParser.g @@ -0,0 +1,93 @@ +// ============================================================================ +// Demo parser for four operator calculator +// ============================================================================ +unit calcParser; + +parser TcalcParser; +options +{ + importVocab = calcLexer; + exportVocab = calcParser; +// k = 2; +} + +// ============================================================================ +// calc +// ============================================================================ +calc +local +{ + v: integer; +} + : (v=expression SEMI {writeln(v);} )+ + ; + + +// ============================================================================ +// expression +// ============================================================================ +expression returns [integer] + : result=simpleExpression + ; + +// ============================================================================ +// simpleExpression +// ============================================================================ +simpleExpression returns [integer] +local +{ + v : integer; +} + : result=term + ( + PLUS v=term { result := result + v; } + | MINUS v=term { result := result - v; } + )* + ; + +// ============================================================================ +// term +// ============================================================================ +term returns [integer] +local +{ + v : integer; +} + : result=factor + ( + STAR v=factor { result := result * v; } + | SLASH v=factor { result := result div v; } + )* + ; + +// ============================================================================ +// factor +// ============================================================================ +factor returns [integer] +local +{ + s: integer; +} +{ + s := 1; +} + : + ( + PLUS { s := 1; } + | MINUS { s := -1; } + )? + ( + result=uInt + | LPAREN result=expression RPAREN + ) + { + result := s * result; + } + ; + +// ============================================================================ +// uInt +// ============================================================================ +uInt returns [integer] + : x:INT { result := StrToInt( x.TokenText); } + ; diff --git a/doc/tutorial/calc/calcParser.pas b/doc/tutorial/calc/calcParser.pas new file mode 100644 index 0000000..f6c5990 --- /dev/null +++ b/doc/tutorial/calc/calcParser.pas @@ -0,0 +1,203 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.1.0d +// Grammar: calcParser +// ============================================================================ +unit calcParser; + +interface + +uses + calcParserTokens, + Classes, + Contnrs, + dpgLLkParser, + dpgToken, + dpgTypes, + SysUtils; + +type + // ========================================================================= + // Class TcalcParser declaration + // ========================================================================= + TcalcParser = class( TdpgLLkParser) + + public // Public grammar rules + procedure calc ; + procedure expression ; + procedure simpleExpression ; + procedure term ; + procedure factor ; + procedure uInt ; + + end; + +implementation +uses + dpgException, + dpgExceptionSemantic, + dpgExceptionMismatchedToken; + +// ============================================================================ +// calc +// ============================================================================ +procedure TcalcParser.calc; +var + _cnt_16: integer; + v: integer; + +begin + + _cnt_16 := 0; + + while(true) do + begin + if (( LA(1) in [TT_LPAREN,TT_PLUS..TT_MINUS,TT_INT])) then + begin + expression; + match(TT_SEMI); + writeln(v); + end + + else + begin + if _cnt_16 >= 1 then + break + else + Raise EdpgMismatchedToken.Create( LT(1), [TT_LPAREN,TT_PLUS..TT_MINUS,TT_INT], FileName); + end; + + INC(_cnt_16); + end; +end; + +// ============================================================================ +// expression +// ============================================================================ +procedure TcalcParser.expression; +begin + + simpleExpression; +end; + +// ============================================================================ +// simpleExpression +// ============================================================================ +procedure TcalcParser.simpleExpression; +var + v : integer; + +begin + + term; + + while(true) do + begin + if (( LA(1) in [TT_PLUS])) then + begin + match(TT_PLUS); + term; + result := result + v; + end + + else if (( LA(1) in [TT_MINUS])) then + begin + match(TT_MINUS); + term; + result := result - v; + end + + else + break; + end; + +end; + +// ============================================================================ +// term +// ============================================================================ +procedure TcalcParser.term; +var + v : integer; + +begin + + factor; + + while(true) do + begin + if (( LA(1) in [TT_STAR])) then + begin + match(TT_STAR); + factor; + result := result * v; + end + + else if (( LA(1) in [TT_SLASH])) then + begin + match(TT_SLASH); + factor; + result := result div v; + end + + else + break; + end; + +end; + +// ============================================================================ +// factor +// ============================================================================ +procedure TcalcParser.factor; +var + s: integer; + +begin + + s := 1; + + if (( LA(1) in [TT_PLUS])) then + begin + match(TT_PLUS); + s := 1; + end + + else if (( LA(1) in [TT_MINUS])) then + begin + match(TT_MINUS); + s := -1; + end; + if (( LA(1) in [TT_INT])) then + begin + uInt; + end + + else if (( LA(1) in [TT_LPAREN])) then + begin + match(TT_LPAREN); + expression; + match(TT_RPAREN); + end + + else + Raise EdpgMismatchedToken.Create( LT(1), [TT_LPAREN,TT_INT], FileName); + result := s * result; +end; + +// ============================================================================ +// uInt +// ============================================================================ +procedure TcalcParser.uInt; +var + x: IdpgToken; + +begin + + x := LT(1); + match(TT_INT); + result := StrToInt( x.TokenText); +end; + +end. diff --git a/doc/tutorial/calc/calcParserTokens.pas b/doc/tutorial/calc/calcParserTokens.pas new file mode 100644 index 0000000..e62c38c --- /dev/null +++ b/doc/tutorial/calc/calcParserTokens.pas @@ -0,0 +1,24 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.1.0d +// Grammar: calcParser +// ============================================================================ +unit calcParserTokens; + +interface + +const + TT_EOF = 1; + TT_LPAREN = 4; + TT_RPAREN = 5; + TT_PLUS = 6; + TT_MINUS = 7; + TT_STAR = 8; + TT_SLASH = 9; + TT_SEMI = 10; + TT_INT = 11; + TT_WS = 12; + +implementation +end. diff --git a/doc/tutorial/calc/calcParserTokens.txt b/doc/tutorial/calc/calcParserTokens.txt new file mode 100644 index 0000000..c791ee4 --- /dev/null +++ b/doc/tutorial/calc/calcParserTokens.txt @@ -0,0 +1,12 @@ +// $Delphi Parser Generator: calcParser -> calcParserTokens.txt$ +TcalcParser +TT_EOF=1 +TT_LPAREN=4 +TT_RPAREN=5 +TT_PLUS=6 +TT_MINUS=7 +TT_STAR=8 +TT_SLASH=9 +TT_SEMI=10 +TT_INT=11 +TT_WS=12 diff --git a/doc/tutorial/calc/readme.txt b/doc/tutorial/calc/readme.txt new file mode 100644 index 0000000..e488282 --- /dev/null +++ b/doc/tutorial/calc/readme.txt @@ -0,0 +1,11 @@ +To build demo project you must first compile the grammars. +This grammar CAN NOT! be compiled with the demo version of dpg. + +1. dpg calcLexer.g +2. dpg calcParser.g + +After the compilation the project can be opened in delphi. Be sure that the dpg +runtime library is in the delphi library path. (In the project settings, +or in the environment settings). + +Have fun... \ No newline at end of file diff --git a/doc/tutorial/hoc/hoc 1/hocLexer.g b/doc/tutorial/hoc/hoc 1/hocLexer.g new file mode 100644 index 0000000..b36fd77 --- /dev/null +++ b/doc/tutorial/hoc/hoc 1/hocLexer.g @@ -0,0 +1,91 @@ +unit hocLexer; + +// ============================================================================ +// Lexer class declaration +// ============================================================================ +lexer ThocLexer; + +// ---------------------------------------------------------------------------- +// Lexer options +// ---------------------------------------------------------------------------- +options +{ + k = 2; + exportVocab = hocLexer; + caseSensitive = false; +} + +// ============================================================================ +// Begin rule definitions +// +// Remember: All lexer rule names must begin with UPPERCASE letter! +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Simple tokens +// ---------------------------------------------------------------------------- +LPAREN : '('; +RPAREN : ')'; + +PLUS : '+'; +MINUS : '-'; +STAR : '*'; +SLASH : '/'; + +// ---------------------------------------------------------------------------- +// NUMBER +// ---------------------------------------------------------------------------- +UNUMBER + : UINT ('.' UINT)? + ; + +// ---------------------------------------------------------------------------- +// UINT +// ---------------------------------------------------------------------------- +protected +UINT + : (DIGIT)+ + ; + +// ---------------------------------------------------------------------------- +// DIGIT +// ---------------------------------------------------------------------------- +protected +DIGIT + : '0'..'9' + ; + +// ---------------------------------------------------------------------------- +// NEWLINE +// ---------------------------------------------------------------------------- +NEWLINE + : + ( + options + { + generateAmbigWarnings = false; + } + : '\r' '\n' { newLine; } + | '\r' { newLine; } + | '\n' { newLine; } + ) + ; + +// ---------------------------------------------------------------------------- +// WHITESPACE +// ---------------------------------------------------------------------------- +WHITESPACE + : + ( + ' ' + | '\t' { tab; } + ) + { + _ttype := TT_SKIP; + } + ; + +// ============================================================================ +// End rule definitions +// ============================================================================ + diff --git a/doc/tutorial/hoc/hoc 1/hocLexer.pas b/doc/tutorial/hoc/hoc 1/hocLexer.pas new file mode 100644 index 0000000..960398b --- /dev/null +++ b/doc/tutorial/hoc/hoc 1/hocLexer.pas @@ -0,0 +1,505 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.1.0r +// Grammar: hocLexer +// ============================================================================ +unit hocLexer; + +interface + +uses + Classes, + Contnrs, + dpgLexer, + dpgToken, + dpgTypes, + hocLexerTokens, + SysUtils; + +type + // ========================================================================= + // Class ThocLexer declaration + // ========================================================================= + ThocLexer = class( TdpgLexer) + + protected // Internals + procedure initialize; override; + + protected // Protected grammar rules + procedure mUINT ( pCreate: boolean); + procedure mDIGIT ( pCreate: boolean); + + protected // Public grammar rules ("rescoped") + procedure mLPAREN ( pCreate: boolean); + procedure mRPAREN ( pCreate: boolean); + procedure mPLUS ( pCreate: boolean); + procedure mMINUS ( pCreate: boolean); + procedure mSTAR ( pCreate: boolean); + procedure mSLASH ( pCreate: boolean); + procedure mUNUMBER ( pCreate: boolean); + procedure mNEWLINE ( pCreate: boolean); + procedure mWHITESPACE ( pCreate: boolean); + + public + function NextToken: IdpgToken; override; + end; + +implementation +uses + dpgException, + dpgExceptionSemantic, + dpgExceptionMismatchedChar; + +// ============================================================================ +// mLPAREN +// ============================================================================ +procedure ThocLexer.mLPAREN( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_LPAREN; + + match('('); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mRPAREN +// ============================================================================ +procedure ThocLexer.mRPAREN( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_RPAREN; + + match(')'); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mPLUS +// ============================================================================ +procedure ThocLexer.mPLUS( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_PLUS; + + match('+'); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mMINUS +// ============================================================================ +procedure ThocLexer.mMINUS( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_MINUS; + + match('-'); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mSTAR +// ============================================================================ +procedure ThocLexer.mSTAR( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_STAR; + + match('*'); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mSLASH +// ============================================================================ +procedure ThocLexer.mSLASH( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_SLASH; + + match('/'); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mUNUMBER +// ============================================================================ +procedure ThocLexer.mUNUMBER( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_UNUMBER; + + mUINT(false); + if (( LA(1) in ['.'])) then + begin + match('.'); + mUINT(false); + end; + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mUINT +// ============================================================================ +procedure ThocLexer.mUINT( pCreate: boolean); +var + _begin: integer; + _cnt_40: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_UINT; + + _cnt_40 := 0; + + while(true) do + begin + if (( LA(1) in ['0'..'9'])) then + begin + mDIGIT(false); + end + + else + begin + if _cnt_40 >= 1 then + break + else + Raise EdpgMismatchedChar.Create( LA(1), ['0'..'9'], FileName, Line, Column); + end; + + INC(_cnt_40); + end; + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mDIGIT +// ============================================================================ +procedure ThocLexer.mDIGIT( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_DIGIT; + + match( ['0'..'9']); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mNEWLINE +// ============================================================================ +procedure ThocLexer.mNEWLINE( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_NEWLINE; + + if (( LA(1) in [#13]) and (LA(2) in [#10])) then + begin + match(#13); + match(#10); + newLine; + end + + else if (( LA(1) in [#13])) then + begin + match(#13); + newLine; + end + + else if (( LA(1) in [#10])) then + begin + match(#10); + newLine; + end + + else + Raise EdpgMismatchedChar.Create( LA(1), [#10,#13], FileName, Line, Column); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mWHITESPACE +// ============================================================================ +procedure ThocLexer.mWHITESPACE( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_WHITESPACE; + + if (( LA(1) in [' '])) then + begin + match(' '); + end + + else if (( LA(1) in [#9])) then + begin + match(#9); + tab; + end + + else + Raise EdpgMismatchedChar.Create( LA(1), [#9,' '], FileName, Line, Column); + _ttype := TT_SKIP; + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ---------------------------------------------------------------------------- +// NextToken +// ---------------------------------------------------------------------------- +function ThocLexer.NextToken : IdpgToken; +var + _first : TdpgCharSet; + +begin + _first := [#9..#10,#13,' ','('..'+','-','/'..'9']; + + while( true) do + begin + ResetText; + + try + if (( LA(1) in ['('])) then + begin + mLPAREN(true); + result := fReturnToken; + end + + else if (( LA(1) in [')'])) then + begin + mRPAREN(true); + result := fReturnToken; + end + + else if (( LA(1) in ['+'])) then + begin + mPLUS(true); + result := fReturnToken; + end + + else if (( LA(1) in ['-'])) then + begin + mMINUS(true); + result := fReturnToken; + end + + else if (( LA(1) in ['*'])) then + begin + mSTAR(true); + result := fReturnToken; + end + + else if (( LA(1) in ['/'])) then + begin + mSLASH(true); + result := fReturnToken; + end + + else if (( LA(1) in ['0'..'9'])) then + begin + mUNUMBER(true); + result := fReturnToken; + end + + else if (( LA(1) in [#10,#13])) then + begin + mNEWLINE(true); + result := fReturnToken; + end + + else if (( LA(1) in [#9,' '])) then + begin + mWHITESPACE(true); + result := fReturnToken; + end + + else + begin + if LA(1) = EOF_CHAR then + begin + uponEof; + result := TdpgToken.Create(TT_EOF); + end + + else + Raise EdpgMismatchedChar.Create(LA(1), _first, FileName, Line, Column); + end; + + // -------------------------------------------------------------- + // If we found a SKIP token, then try again... + // -------------------------------------------------------------- + if result = nil then + continue; + + // -------------------------------------------------------------- + // Now we have a valid token, so exit the function + // -------------------------------------------------------------- + break; + + except + Raise; + end; + end; +end; + +// ---------------------------------------------------------------------------- +// InitLiterals +// ---------------------------------------------------------------------------- +procedure ThocLexer.initialize; +begin + fCaseSensitive := false; + fLiterals.CaseSensitive := false; + +end; + +end. diff --git a/doc/tutorial/hoc/hoc 1/hocLexerTokens.pas b/doc/tutorial/hoc/hoc 1/hocLexerTokens.pas new file mode 100644 index 0000000..5670b01 --- /dev/null +++ b/doc/tutorial/hoc/hoc 1/hocLexerTokens.pas @@ -0,0 +1,26 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.1.0r +// Grammar: hocLexer +// ============================================================================ +unit hocLexerTokens; + +interface + +const + TT_EOF = 1; + TT_LPAREN = 4; + TT_RPAREN = 5; + TT_PLUS = 6; + TT_MINUS = 7; + TT_STAR = 8; + TT_SLASH = 9; + TT_UNUMBER = 10; + TT_UINT = 11; + TT_DIGIT = 12; + TT_NEWLINE = 13; + TT_WHITESPACE = 14; + +implementation +end. diff --git a/doc/tutorial/hoc/hoc 1/hocLexerTokens.txt b/doc/tutorial/hoc/hoc 1/hocLexerTokens.txt new file mode 100644 index 0000000..ba67c9d --- /dev/null +++ b/doc/tutorial/hoc/hoc 1/hocLexerTokens.txt @@ -0,0 +1,14 @@ +// $Delphi Parser Generator: hocLexer -> hocLexerTokens.txt$ +ThocLexer +TT_EOF=1 +TT_LPAREN=4 +TT_RPAREN=5 +TT_PLUS=6 +TT_MINUS=7 +TT_STAR=8 +TT_SLASH=9 +TT_UNUMBER=10 +TT_UINT=11 +TT_DIGIT=12 +TT_NEWLINE=13 +TT_WHITESPACE=14 diff --git a/doc/tutorial/hoc/hoc 1/hocParser.g b/doc/tutorial/hoc/hoc 1/hocParser.g new file mode 100644 index 0000000..987cd76 --- /dev/null +++ b/doc/tutorial/hoc/hoc 1/hocParser.g @@ -0,0 +1,101 @@ +unit hocParser; + +// ============================================================================ +// Parser class declaration +// ============================================================================ +parser ThocParser; + +// ---------------------------------------------------------------------------- +// Parser options +// ---------------------------------------------------------------------------- +options +{ + k = 2; + importVocab = hocLexer; + exportVocab = hocParser; +} + +// ============================================================================ +// Begin rule definitions +// +// Remember: All parser rule names must begin with LOWERCASE letter! +// ============================================================================ + +// ---------------------------------------------------------------------------- +// prog +// ---------------------------------------------------------------------------- +prog +local +{ + val : double; +} + : + ( + ( + val = expression + )? + + NEWLINE + )* + ; + +// ---------------------------------------------------------------------------- +// expression +// ---------------------------------------------------------------------------- +expression returns [double] +local +{ + val : double; +} + : result = term + ( + PLUS val = term { result := result + val; } + | MINUS val = term { result := result - val; } + )* + ; + +// ---------------------------------------------------------------------------- +// term +// ---------------------------------------------------------------------------- +term returns [double] +local +{ + val : double; +} + : result = factor + ( + STAR val = factor { result := result * val; } + | SLASH val = factor { result := result / val; } + )* + ; + +// ---------------------------------------------------------------------------- +// factor +// ---------------------------------------------------------------------------- +factor returns [double] + : result = uNumber + | LPAREN result = expression RPAREN + ; + +// ---------------------------------------------------------------------------- +// uNumber +// ---------------------------------------------------------------------------- +uNumber returns [double] + : n:UNUMBER + { + result := StrToIntDef( n.TokenText); + } + ; + +// ============================================================================ +// End rule definitions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// This section is used for generating member defintions in the unit 'hocParser'. +// The content of the section is verbatim copied into the generated code. +// ---------------------------------------------------------------------------- +memberdef +{ +} + diff --git a/doc/tutorial/hoc/hoc 1/hocParser.pas b/doc/tutorial/hoc/hoc 1/hocParser.pas new file mode 100644 index 0000000..715a208 --- /dev/null +++ b/doc/tutorial/hoc/hoc 1/hocParser.pas @@ -0,0 +1,170 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.1.0r +// Grammar: hocParser +// ============================================================================ +unit hocParser; + +interface + +uses + Classes, + Contnrs, + dpgLLkParser, + dpgToken, + dpgTypes, + hocParserTokens, + SysUtils; + +type + // ========================================================================= + // Class ThocParser declaration + // ========================================================================= + ThocParser = class( TdpgLLkParser) + + public // Public grammar rules + procedure prog ; + function expression : double; + function term : double; + function factor : double; + function uNumber : double; + + end; + +implementation +uses + dpgException, + dpgExceptionSemantic, + dpgExceptionMismatchedToken; + +// ============================================================================ +// prog +// ============================================================================ +procedure ThocParser.prog; +var + val : double; + +begin + + + while(true) do + begin + if (( LA(1) in [TT_LPAREN,TT_UNUMBER,TT_NEWLINE])) then + begin + if (( LA(1) in [TT_LPAREN,TT_UNUMBER])) then + begin + val := expression; + end; + match(TT_NEWLINE); + end + + else + break; + end; + +end; + +// ============================================================================ +// expression +// ============================================================================ +function ThocParser.expression: double; +var + val : double; + +begin + + result := term; + + while(true) do + begin + if (( LA(1) in [TT_PLUS])) then + begin + match(TT_PLUS); + val := term; + result := result + val; + end + + else if (( LA(1) in [TT_MINUS])) then + begin + match(TT_MINUS); + val := term; + result := result - val; + end + + else + break; + end; + +end; + +// ============================================================================ +// term +// ============================================================================ +function ThocParser.term: double; +var + val : double; + +begin + + result := factor; + + while(true) do + begin + if (( LA(1) in [TT_STAR])) then + begin + match(TT_STAR); + val := factor; + result := result * val; + end + + else if (( LA(1) in [TT_SLASH])) then + begin + match(TT_SLASH); + val := factor; + result := result / val; + end + + else + break; + end; + +end; + +// ============================================================================ +// factor +// ============================================================================ +function ThocParser.factor: double; +begin + + if (( LA(1) in [TT_UNUMBER])) then + begin + result := uNumber; + end + + else if (( LA(1) in [TT_LPAREN])) then + begin + match(TT_LPAREN); + result := expression; + match(TT_RPAREN); + end + + else + Raise EdpgMismatchedToken.Create( LT(1), [TT_LPAREN,TT_UNUMBER], FileName); +end; + +// ============================================================================ +// uNumber +// ============================================================================ +function ThocParser.uNumber: double; +var + n: IdpgToken; + +begin + + n := LT(1); + match(TT_UNUMBER); + result := StrToIntDef( n.TokenText); +end; + +end. diff --git a/doc/tutorial/hoc/hoc 1/hocParserTokens.pas b/doc/tutorial/hoc/hoc 1/hocParserTokens.pas new file mode 100644 index 0000000..228cdc3 --- /dev/null +++ b/doc/tutorial/hoc/hoc 1/hocParserTokens.pas @@ -0,0 +1,26 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.1.0r +// Grammar: hocParser +// ============================================================================ +unit hocParserTokens; + +interface + +const + TT_EOF = 1; + TT_LPAREN = 4; + TT_RPAREN = 5; + TT_PLUS = 6; + TT_MINUS = 7; + TT_STAR = 8; + TT_SLASH = 9; + TT_UNUMBER = 10; + TT_UINT = 11; + TT_DIGIT = 12; + TT_NEWLINE = 13; + TT_WHITESPACE = 14; + +implementation +end. diff --git a/doc/tutorial/hoc/hoc 1/hocParserTokens.txt b/doc/tutorial/hoc/hoc 1/hocParserTokens.txt new file mode 100644 index 0000000..f64cf94 --- /dev/null +++ b/doc/tutorial/hoc/hoc 1/hocParserTokens.txt @@ -0,0 +1,14 @@ +// $Delphi Parser Generator: hocParser -> hocParserTokens.txt$ +ThocParser +TT_EOF=1 +TT_LPAREN=4 +TT_RPAREN=5 +TT_PLUS=6 +TT_MINUS=7 +TT_STAR=8 +TT_SLASH=9 +TT_UNUMBER=10 +TT_UINT=11 +TT_DIGIT=12 +TT_NEWLINE=13 +TT_WHITESPACE=14 diff --git a/doc/tutorial/old/filter/filter.g b/doc/tutorial/old/filter/filter.g new file mode 100644 index 0000000..fcddd47 --- /dev/null +++ b/doc/tutorial/old/filter/filter.g @@ -0,0 +1,43 @@ +unit filter; + +lexer Tfilter; +options +{ + k = 2; + filter = true; +} + +// ---------------------------------------------------------------------------- +// Paragraph +// ---------------------------------------------------------------------------- +P + : "

" + ; + +// ---------------------------------------------------------------------------- +// Break +// ---------------------------------------------------------------------------- +BR + : "
" + ; + +// ---------------------------------------------------------------------------- +// Newline +// ---------------------------------------------------------------------------- +NEWLINE + : + ( + '\r' '\n' { newLine; _ttype := TT_SKIP; } + | '\r' { newLine; _ttype := TT_SKIP; } + | '\n' { newLine; _ttype := TT_SKIP; } + ) + ; + +// ---------------------------------------------------------------------------- +// Tab +// ---------------------------------------------------------------------------- +TAB + : '\t' { tab; _ttype := TT_SKIP; } + ; + + diff --git a/doc/tutorial/old/filter/filter.pas b/doc/tutorial/old/filter/filter.pas new file mode 100644 index 0000000..94c5345 --- /dev/null +++ b/doc/tutorial/old/filter/filter.pas @@ -0,0 +1,234 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.0.78r +// Grammar: filter.g +// ============================================================================ +unit filter; + +interface + +uses + Classes, + Contnrs, + dpgLexer, + dpgToken, + dpgTypes, + filterTokens, + SysUtils; + +type + // ========================================================================= + // Class Tfilter declaration + // ========================================================================= + Tfilter = class( TdpgLexer) + + protected // Public grammar rules ("rescoped") + procedure mP ( pCreate: boolean); + procedure mBR ( pCreate: boolean); + procedure mNEWLINE ( pCreate: boolean); + procedure mTAB ( pCreate: boolean); + + public + function NextToken: IdpgToken; override; + end; + +implementation +uses + dpgException, + dpgExceptionSemantic, + dpgExceptionMismatchedChar; + +// ============================================================================ +// mP +// ============================================================================ +procedure Tfilter.mP( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_P; + + match('

'); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mBR +// ============================================================================ +procedure Tfilter.mBR( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_BR; + + match('
'); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mNEWLINE +// ============================================================================ +procedure Tfilter.mNEWLINE( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_NEWLINE; + + if (( LA(1) in [#13]) and (LA(2) in [#10])) then + begin + match(#13); + match(#10); + newLine; _ttype := TT_SKIP; + end + + else if (( LA(1) in [#13])) then + begin + match(#13); + newLine; _ttype := TT_SKIP; + end + + else if (( LA(1) in [#10])) then + begin + match(#10); + newLine; _ttype := TT_SKIP; + end + + else + Raise EdpgMismatchedChar.Create( LA(1), [#10,#13], FileName, Line, Column); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mTAB +// ============================================================================ +procedure Tfilter.mTAB( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_TAB; + + match(#9); + tab; _ttype := TT_SKIP; + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ---------------------------------------------------------------------------- +// NextToken +// ---------------------------------------------------------------------------- +function Tfilter.NextToken : IdpgToken; +begin + while( true) do + begin + ResetText; + + try + if (( LA(1) in ['<']) and (LA(2) in ['p'])) then + begin + mP(true); + result := fReturnToken; + end + + else if (( LA(1) in ['<']) and (LA(2) in ['b'])) then + begin + mBR(true); + result := fReturnToken; + end + + else if (( LA(1) in [#10,#13])) then + begin + mNEWLINE(true); + result := fReturnToken; + end + + else if (( LA(1) in [#9])) then + begin + mTAB(true); + result := fReturnToken; + end + + else + begin + if LA(1) = EOF_CHAR then + begin + uponEof; + result := TdpgToken.Create(TT_EOF); + end + + else + begin + consume; + continue; + end; + end; + + // -------------------------------------------------------------- + // If we found a SKIP token, then try again... + // -------------------------------------------------------------- + if result = nil then + continue; + + // -------------------------------------------------------------- + // Now we have a valid token, so exit the function + // -------------------------------------------------------------- + break; + + except + consume; + continue; + end; + end; +end; + +end. diff --git a/doc/tutorial/old/filter/filterTokens.pas b/doc/tutorial/old/filter/filterTokens.pas new file mode 100644 index 0000000..b20bca4 --- /dev/null +++ b/doc/tutorial/old/filter/filterTokens.pas @@ -0,0 +1,19 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.0.78r +// Grammar: filter.g +// ============================================================================ +unit filterTokens; + +interface + +const + TT_EOF = 1; + TT_P = 4; + TT_BR = 5; + TT_NEWLINE = 6; + TT_TAB = 7; + +implementation +end. diff --git a/doc/tutorial/old/filter/filterTokens.txt b/doc/tutorial/old/filter/filterTokens.txt new file mode 100644 index 0000000..a93fffe --- /dev/null +++ b/doc/tutorial/old/filter/filterTokens.txt @@ -0,0 +1,7 @@ +// $Delphi Parser Generator: filter.pas -> TfilterTokens.txt$ +Tfilter +TT_EOF=1 +TT_P=4 +TT_BR=5 +TT_NEWLINE=6 +TT_TAB=7 diff --git a/doc/tutorial/old/multiLexer/JavaDocLexer.pas b/doc/tutorial/old/multiLexer/JavaDocLexer.pas new file mode 100644 index 0000000..2652451 --- /dev/null +++ b/doc/tutorial/old/multiLexer/JavaDocLexer.pas @@ -0,0 +1,367 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.0.118r +// Grammar: javadoclexer.g +// ============================================================================ +unit JavaDocLexer; + +interface + +uses + Classes, + Contnrs, + dpgLexer, + dpgToken, + dpgTokenStreamSelector, + dpgTypes, + JavaDocTokens, + SysUtils; + +type + // ========================================================================= + // Class TJavaDocLexer declaration + // ========================================================================= + TJavaDocLexer = class( TdpgLexer) + + public + Selector : IdpgTokenStreamSelector; + + protected // Protected grammar rules + procedure mID ( pCreate: boolean); + + protected // Public grammar rules ("rescoped") + procedure mPARAM ( pCreate: boolean); + procedure mEXCEPTION ( pCreate: boolean); + procedure mSTAR ( pCreate: boolean); + procedure mJAVADOC_CLOSE ( pCreate: boolean); + procedure mNEWLINE ( pCreate: boolean); + + public + function NextToken: IdpgToken; override; + end; + +implementation +uses + dpgException, + dpgExceptionSemantic, + dpgExceptionMismatchedChar; + +// ============================================================================ +// mPARAM +// ============================================================================ +procedure TJavaDocLexer.mPARAM( pCreate: boolean); +var + _begin: integer; + _cnt_3: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_PARAM; + + match('@param'); + _cnt_3 := 0; + + while(true) do + begin + if (( LA(1) in [' '])) then + begin + match(' '); + end + + else + begin + if _cnt_3 >= 1 then + break + else + Raise EdpgMismatchedChar.Create( LA(1), [' '], FileName, Line, Column); + end; + + INC(_cnt_3); + end; + mID(false); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mID +// ============================================================================ +procedure TJavaDocLexer.mID( pCreate: boolean); +var + _begin: integer; + _cnt_9: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_ID; + + _cnt_9 := 0; + + while(true) do + begin + if (( LA(1) in ['a'..'z'])) then + begin + match( ['a'..'z']); + end + + else + begin + if _cnt_9 >= 1 then + break + else + Raise EdpgMismatchedChar.Create( LA(1), ['a'..'z'], FileName, Line, Column); + end; + + INC(_cnt_9); + end; + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mEXCEPTION +// ============================================================================ +procedure TJavaDocLexer.mEXCEPTION( pCreate: boolean); +var + _begin: integer; + _cnt_6: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_EXCEPTION; + + match('@exception'); + _cnt_6 := 0; + + while(true) do + begin + if (( LA(1) in [' '])) then + begin + match(' '); + end + + else + begin + if _cnt_6 >= 1 then + break + else + Raise EdpgMismatchedChar.Create( LA(1), [' '], FileName, Line, Column); + end; + + INC(_cnt_6); + end; + mID(false); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mSTAR +// ============================================================================ +procedure TJavaDocLexer.mSTAR( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_STAR; + + match('*'); + _ttype := TT_SKIP; + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mJAVADOC_CLOSE +// ============================================================================ +procedure TJavaDocLexer.mJAVADOC_CLOSE( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_JAVADOC_CLOSE; + + match('*/'); + Selector.Pop; + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mNEWLINE +// ============================================================================ +procedure TJavaDocLexer.mNEWLINE( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_NEWLINE; + + if (( LA(1) in [#13]) and (LA(2) in [#10])) then + begin + match(#13); + match(#10); + end + + else if (( LA(1) in [#13])) then + begin + match(#13); + end + + else if (( LA(1) in [#10])) then + begin + match(#10); + end + + else + Raise EdpgMismatchedChar.Create( LA(1), [#10,#13], FileName, Line, Column); + newLine; + _ttype := TT_SKIP; + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ---------------------------------------------------------------------------- +// NextToken +// ---------------------------------------------------------------------------- +function TJavaDocLexer.NextToken : IdpgToken; +var + la1 : char; + la2 : char; +begin + while( true) do + begin + ResetText; + + try + la1 := LA(1); + la2 := LA(2); + + if (( LA(1) in ['@']) and (LA(2) in ['p'])) then + begin + mPARAM(true); + result := fReturnToken; + end + + else if (( LA(1) in ['@']) and (LA(2) in ['e'])) then + begin + mEXCEPTION(true); + result := fReturnToken; + end + + else if (( LA(1) in ['*']) and (LA(2) in ['/'])) then + begin + mJAVADOC_CLOSE(true); + result := fReturnToken; + end + + else if (( LA(1) in ['*'])) then + begin + mSTAR(true); + result := fReturnToken; + end + + else if (( LA(1) in [#10,#13])) then + begin + mNEWLINE(true); + result := fReturnToken; + end + + else + begin + if LA(1) = EOF_CHAR then + begin + uponEof; + result := TdpgToken.Create(TT_EOF); + end + + else + begin + consume; + continue; + end; + end; + + // -------------------------------------------------------------- + // If we found a SKIP token, then try again... + // -------------------------------------------------------------- + if result = nil then + continue; + + // -------------------------------------------------------------- + // Now we have a valid token, so exit the function + // -------------------------------------------------------------- + break; + + except + consume; + continue; + end; + end; +end; + +end. diff --git a/doc/tutorial/old/multiLexer/JavaDocTokens.pas b/doc/tutorial/old/multiLexer/JavaDocTokens.pas new file mode 100644 index 0000000..8aa1885 --- /dev/null +++ b/doc/tutorial/old/multiLexer/JavaDocTokens.pas @@ -0,0 +1,21 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.0.118r +// Grammar: javadoclexer.g +// ============================================================================ +unit JavaDocTokens; + +interface + +const + TT_EOF = 1; + TT_PARAM = 4; + TT_EXCEPTION = 5; + TT_ID = 6; + TT_STAR = 7; + TT_JAVADOC_CLOSE = 8; + TT_NEWLINE = 9; + +implementation +end. diff --git a/doc/tutorial/old/multiLexer/JavaDocTokens.txt b/doc/tutorial/old/multiLexer/JavaDocTokens.txt new file mode 100644 index 0000000..10a1496 --- /dev/null +++ b/doc/tutorial/old/multiLexer/JavaDocTokens.txt @@ -0,0 +1,9 @@ +// $Delphi Parser Generator: JavaDocLexer.pas -> TJavaDocLexerTokens.txt$ +TJavaDocLexer +TT_EOF=1 +TT_PARAM=4 +TT_EXCEPTION=5 +TT_ID=6 +TT_STAR=7 +TT_JAVADOC_CLOSE=8 +TT_NEWLINE=9 diff --git a/doc/tutorial/old/multiLexer/JavaLexer.pas b/doc/tutorial/old/multiLexer/JavaLexer.pas new file mode 100644 index 0000000..206dcd5 --- /dev/null +++ b/doc/tutorial/old/multiLexer/JavaLexer.pas @@ -0,0 +1,286 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.0.118r +// Grammar: javalexer.g +// ============================================================================ +unit JavaLexer; + +interface + +uses + Classes, + Contnrs, + dpgLexer, + dpgToken, + dpgTokenStreamSelector, + dpgTypes, + JavaTokens, + SysUtils; + +type + // ========================================================================= + // Class TJavaLexer declaration + // ========================================================================= + TJavaLexer = class( TdpgLexer) + + public + Selector : IdpgTokenStreamSelector; + + protected // Internals + procedure initialize; override; + + protected // Public grammar rules ("rescoped") + procedure mSEMI ( pCreate: boolean); + procedure mJAVADOC_OPEN ( pCreate: boolean); + procedure mID ( pCreate: boolean); + procedure mWS ( pCreate: boolean); + + public + function NextToken: IdpgToken; override; + end; + +implementation +uses + dpgException, + dpgExceptionSemantic, + dpgExceptionMismatchedChar; + +// ============================================================================ +// mSEMI +// ============================================================================ +procedure TJavaLexer.mSEMI( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_SEMI; + + match(';'); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mJAVADOC_OPEN +// ============================================================================ +procedure TJavaLexer.mJAVADOC_OPEN( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_JAVADOC_OPEN; + + match('/**'); + Selector.Push('docLexer'); + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mID +// ============================================================================ +procedure TJavaLexer.mID( pCreate: boolean); +var + _begin: integer; + _cnt_5: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_ID; + + _cnt_5 := 0; + + while(true) do + begin + if (( LA(1) in ['a'..'z'])) then + begin + match( ['a'..'z']); + end + + else + begin + if _cnt_5 >= 1 then + break + else + Raise EdpgMismatchedChar.Create( LA(1), ['a'..'z'], FileName, Line, Column); + end; + + INC(_cnt_5); + end; + _ttype := testLit( _ttype); + + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ============================================================================ +// mWS +// ============================================================================ +procedure TJavaLexer.mWS( pCreate: boolean); +var + _begin: integer; + _save: integer; + _token: IdpgToken; + _ttype: integer; + +begin + _begin := Length( fText) +1; + _token := nil; + _ttype := TT_WS; + + if (( LA(1) in [' '])) then + begin + match(' '); + end + + else if (( LA(1) in [#9])) then + begin + match(#9); + end + + else if (( LA(1) in [#10,#13])) then + begin + if (( LA(1) in [#13]) and (LA(2) in [#10])) then + begin + match(#13); + match(#10); + end + + else if (( LA(1) in [#13])) then + begin + match(#13); + end + + else if (( LA(1) in [#10])) then + begin + match(#10); + end + + else + Raise EdpgMismatchedChar.Create( LA(1), [#10,#13], FileName, Line, Column); + newLine; + end + + else + Raise EdpgMismatchedChar.Create( LA(1), [#9..#10,#13,' '], FileName, Line, Column); + _ttype := TT_SKIP; + + if (_ttype <> TT_SKIP) and (pCreate = true) then + begin + _token := makeToken( _ttype); + _token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1); + end; + + fReturnToken := _token; +end; + +// ---------------------------------------------------------------------------- +// NextToken +// ---------------------------------------------------------------------------- +function TJavaLexer.NextToken : IdpgToken; +var + _first : TdpgCharSet; + +begin + _first := [#9..#10,#13,' ','/',';','a'..'z']; + + while( true) do + begin + ResetText; + + try + if (( LA(1) in [';'])) then + begin + mSEMI(true); + result := fReturnToken; + end + + else if (( LA(1) in ['/'])) then + begin + mJAVADOC_OPEN(true); + result := fReturnToken; + end + + else if (( LA(1) in ['a'..'z'])) then + begin + mID(true); + result := fReturnToken; + end + + else if (( LA(1) in [#9..#10,#13,' '])) then + begin + mWS(true); + result := fReturnToken; + end + + else + begin + if LA(1) = EOF_CHAR then + begin + uponEof; + result := TdpgToken.Create(TT_EOF); + end + + else + Raise EdpgMismatchedChar.Create(LA(1), _first, FileName, Line, Column); + end; + + // -------------------------------------------------------------- + // If we found a SKIP token, then try again... + // -------------------------------------------------------------- + if result = nil then + continue; + + // -------------------------------------------------------------- + // Now we have a valid token, so exit the function + // -------------------------------------------------------------- + break; + + except + Raise; + end; + end; +end; + +// ---------------------------------------------------------------------------- +// InitLiterals +// ---------------------------------------------------------------------------- +procedure TJavaLexer.initialize; +begin + fLiterals.Add('int', 10); +end; + +end. diff --git a/doc/tutorial/old/multiLexer/JavaTokens.pas b/doc/tutorial/old/multiLexer/JavaTokens.pas new file mode 100644 index 0000000..e4cef1e --- /dev/null +++ b/doc/tutorial/old/multiLexer/JavaTokens.pas @@ -0,0 +1,25 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.0.118r +// Grammar: javalexer.g +// ============================================================================ +unit JavaTokens; + +interface + +const + TT_EOF = 1; + TT_PARAM = 4; + TT_EXCEPTION = 5; + TT_ID = 6; + TT_STAR = 7; + TT_JAVADOC_CLOSE = 8; + TT_NEWLINE = 9; + LT_int = 10; + TT_SEMI = 11; + TT_JAVADOC_OPEN = 12; + TT_WS = 13; + +implementation +end. diff --git a/doc/tutorial/old/multiLexer/JavaTokens.txt b/doc/tutorial/old/multiLexer/JavaTokens.txt new file mode 100644 index 0000000..1965254 --- /dev/null +++ b/doc/tutorial/old/multiLexer/JavaTokens.txt @@ -0,0 +1,13 @@ +// $Delphi Parser Generator: JavaLexer.pas -> TJavaLexerTokens.txt$ +TJavaLexer +TT_EOF=1 +TT_PARAM=4 +TT_EXCEPTION=5 +TT_ID=6 +TT_STAR=7 +TT_JAVADOC_CLOSE=8 +TT_NEWLINE=9 +LT_int="int"=10 +TT_SEMI=11 +TT_JAVADOC_OPEN=12 +TT_WS=13 diff --git a/doc/tutorial/old/multiLexer/demo.dpr b/doc/tutorial/old/multiLexer/demo.dpr new file mode 100644 index 0000000..ac84a45 --- /dev/null +++ b/doc/tutorial/old/multiLexer/demo.dpr @@ -0,0 +1,49 @@ +program demo; + +{$APPTYPE CONSOLE} + +uses + Classes, + SysUtils, + dpgTokenStreamSelector, + javaLexer, + javaDocLexer, + javaParser; + +var + stm : TFileStream; + lexJava : TJavaLexer; + lexJavaDoc : TJavaDocLexer; + parJava : TJavaParser; + sel : TdpgTokenStreamSelector; + +begin + if ParamCount = 1 then + begin + try + stm := TFileStream.Create( ParamStr(1), fmOpenRead); + sel := TdpgTokenStreamSelector.Create; + + lexJava := TJavaLexer.Create( stm); + lexJavaDoc := TJavaDocLexer.Create( lexJava.InputState); + + lexJava.Selector := sel; + lexJavaDoc.Selector := sel; + + sel.add( lexJava, 'main'); + sel.add( lexJavaDoc, 'docLexer'); + sel.select( 'main'); + + parJava := TJavaParser.Create( sel); + + parJava.input; + except + writeln('Exception...'); + end; + end; + + + + + { TODO -oUser -cConsole Main : Insert code here } +end. diff --git a/doc/tutorial/old/multiLexer/javaDocLexer.g b/doc/tutorial/old/multiLexer/javaDocLexer.g new file mode 100644 index 0000000..1e5ddb7 --- /dev/null +++ b/doc/tutorial/old/multiLexer/javaDocLexer.g @@ -0,0 +1,76 @@ +unit JavaDocLexer; + +uses +{ + dpgTokenStreamSelector; +} + +lexer TJavaDocLexer; +options +{ + k = 2; + exportVocab = JavaDoc; + filter = true; +} + +memberdecl +{ + public + Selector : IdpgTokenStreamSelector; +} + +// ---------------------------------------------------------------------------- +// @param +// ---------------------------------------------------------------------------- +PARAM + : "@param" (' ')+ ID + ; + +// ---------------------------------------------------------------------------- +// @exception +// ---------------------------------------------------------------------------- +EXCEPTION + : "@exception" (' ')+ ID + ; + +// ---------------------------------------------------------------------------- +// identifier +// ---------------------------------------------------------------------------- +protected ID + : ('a'..'z')+ + ; + +// ---------------------------------------------------------------------------- +// Star +// +// This rule simply prevents JAVADOC_CLOSE from being called for every '*' in +// a comment. Calling JAVADOC_CLOSE will fail for simple '*' and cause an +// exception, which is slow. In other words, the grammar will work without +// this rule, but is slower. +// ---------------------------------------------------------------------------- +STAR + : '*' { _ttype := TT_SKIP; } + ; + +// ---------------------------------------------------------------------------- +// JavaDocClose +// ---------------------------------------------------------------------------- +JAVADOC_CLOSE + : "*/" { Selector.Pop; } + ; + +// ---------------------------------------------------------------------------- +// NewLine +// ---------------------------------------------------------------------------- +NEWLINE + : + ( + '\r' '\n' + | '\r' + | '\n' + ) + { + newLine; + _ttype := TT_SKIP; + } + ; \ No newline at end of file diff --git a/doc/tutorial/old/multiLexer/javaLexer.g b/doc/tutorial/old/multiLexer/javaLexer.g new file mode 100644 index 0000000..caff66b --- /dev/null +++ b/doc/tutorial/old/multiLexer/javaLexer.g @@ -0,0 +1,68 @@ +unit JavaLexer; + +uses +{ + dpgTokenStreamSelector; +} + +lexer TJavaLexer; +options +{ + k = 2; + importVocab = JavaDoc; + exportVocab = Java; +} + +tokens +{ + "int"; +} + +memberdecl +{ + public + Selector : IdpgTokenStreamSelector; +} + +// ---------------------------------------------------------------------------- +// Simple tokens +// ---------------------------------------------------------------------------- +SEMI : ';'; + +// ---------------------------------------------------------------------------- +// JavaDocOpen +// ---------------------------------------------------------------------------- +JAVADOC_OPEN + : "/**" { Selector.Push('docLexer'); } + ; + +// ---------------------------------------------------------------------------- +// Identifier +// ---------------------------------------------------------------------------- +ID +options +{ + testLiterals = true; +} + : ('a'..'z')+ + ; + +WS + : + ( + ' ' + | '\t' + | + ( + '\r' '\n' + | '\r' + | '\n' + ) + { + newLine; + } + ) + { + _ttype := TT_SKIP; + } + ; \ No newline at end of file diff --git a/doc/tutorial/old/multiLexer/javaParser.g b/doc/tutorial/old/multiLexer/javaParser.g new file mode 100644 index 0000000..f46b339 --- /dev/null +++ b/doc/tutorial/old/multiLexer/javaParser.g @@ -0,0 +1,20 @@ +unit javaParser; + +parser TJavaParser; +options +{ + k = 2; + importVocab = Java; +} + +input + : ( (javadoc)? "int" ID SEMI)+ + ; + +javadoc + : + JAVADOC_OPEN + (PARAM)? + (EXCEPTION)? + JAVADOC_CLOSE + ; \ No newline at end of file diff --git a/doc/tutorial/old/multiLexer/javaParser.pas b/doc/tutorial/old/multiLexer/javaParser.pas new file mode 100644 index 0000000..8754de0 --- /dev/null +++ b/doc/tutorial/old/multiLexer/javaParser.pas @@ -0,0 +1,91 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.0.118r +// Grammar: javaparser.g +// ============================================================================ +unit javaParser; + +interface + +uses + Classes, + Contnrs, + dpgLLkParser, + dpgToken, + dpgTypes, + javaParserTokens, + SysUtils; + +type + // ========================================================================= + // Class TJavaParser declaration + // ========================================================================= + TJavaParser = class( TdpgLLkParser) + + public // Public grammar rules + procedure input ; + procedure javadoc ; + + end; + +implementation +uses + dpgException, + dpgExceptionSemantic, + dpgExceptionMismatchedToken; + +// ============================================================================ +// input +// ============================================================================ +procedure TJavaParser.input; +var + _cnt_4: integer; + +begin + _cnt_4 := 0; + + while(true) do + begin + if (( LA(1) in [LT_int,TT_JAVADOC_OPEN])) then + begin + if (( LA(1) in [TT_JAVADOC_OPEN])) then + begin + javadoc; + end; + match(LT_int); + match(TT_ID); + match(TT_SEMI); + end + + else + begin + if _cnt_4 >= 1 then + break + else + Raise EdpgMismatchedToken.Create( LT(1), [LT_int,TT_JAVADOC_OPEN], FileName); + end; + + INC(_cnt_4); + end; +end; + +// ============================================================================ +// javadoc +// ============================================================================ +procedure TJavaParser.javadoc; +begin + + match(TT_JAVADOC_OPEN); + if (( LA(1) in [TT_PARAM])) then + begin + match(TT_PARAM); + end; + if (( LA(1) in [TT_EXCEPTION])) then + begin + match(TT_EXCEPTION); + end; + match(TT_JAVADOC_CLOSE); +end; + +end. diff --git a/doc/tutorial/old/multiLexer/javaParserTokens.pas b/doc/tutorial/old/multiLexer/javaParserTokens.pas new file mode 100644 index 0000000..86767a1 --- /dev/null +++ b/doc/tutorial/old/multiLexer/javaParserTokens.pas @@ -0,0 +1,25 @@ +// ============================================================================ +// This file is generated by the Delphi Parser Generator. +// ---------------------------------------------------------------------------- +// DPG version: 1.0.0.118r +// Grammar: javaparser.g +// ============================================================================ +unit javaParserTokens; + +interface + +const + TT_EOF = 1; + TT_PARAM = 4; + TT_EXCEPTION = 5; + TT_ID = 6; + TT_STAR = 7; + TT_JAVADOC_CLOSE = 8; + TT_NEWLINE = 9; + LT_int = 10; + TT_SEMI = 11; + TT_JAVADOC_OPEN = 12; + TT_WS = 13; + +implementation +end. diff --git a/doc/tutorial/old/multiLexer/javaParserTokens.txt b/doc/tutorial/old/multiLexer/javaParserTokens.txt new file mode 100644 index 0000000..b11fbb9 --- /dev/null +++ b/doc/tutorial/old/multiLexer/javaParserTokens.txt @@ -0,0 +1,13 @@ +// $Delphi Parser Generator: javaParser.pas -> TJavaParserTokens.txt$ +TJavaParser +TT_EOF=1 +TT_PARAM=4 +TT_EXCEPTION=5 +TT_ID=6 +TT_STAR=7 +TT_JAVADOC_CLOSE=8 +TT_NEWLINE=9 +LT_int="int"=10 +TT_SEMI=11 +TT_JAVADOC_OPEN=12 +TT_WS=13 diff --git a/doc/tutorial/old/multiLexer/test.in b/doc/tutorial/old/multiLexer/test.in new file mode 100644 index 0000000..b50f9fd --- /dev/null +++ b/doc/tutorial/old/multiLexer/test.in @@ -0,0 +1,20 @@ +/** a javadoc comment + * @param foo + * @exception bar + * Just a little text for a comment + */ +int abc; + +/** a javadoc comment + * @param foo + * @exception bar + * Just a little text for a comment + */ +int zzz; + +/** a javadoc comment + * @param foo + * @exception bar + * Just a little text for a comment + */ +int xxx; diff --git a/doc/usersguide/dpg.pdf b/doc/usersguide/dpg.pdf new file mode 100644 index 0000000..a996358 Binary files /dev/null and b/doc/usersguide/dpg.pdf differ diff --git a/doc/usersguide/dpg.tex b/doc/usersguide/dpg.tex new file mode 100644 index 0000000..7d8851d --- /dev/null +++ b/doc/usersguide/dpg.tex @@ -0,0 +1,36 @@ +\documentclass{zlbook} +\usepackage{minitoc} +%\usepackage[toc,page]{appendix} +%\usepackage{mtcoff} +\title{Delphi Parser Generator \\ user's guide} +\begin{document} +\dominitoc +\dominilof +\dominilot + +\pagestyle{empty} +\renewcommand{\thepage}{\roman{page}} +\maketitle + +\tableofcontents +%\listoftables +\renewcommand{\thepage}{\thechapter\ - \arabic{page}} +\clearpage +\pagestyle{fancy} + +\input{src/intro/intro} +\input{src/start/start} +\input{src/lang/lang} +\input{src/gram/gram} +\input{src/tokens/tokens} +\input{src/rt/rt} + + +\appendix +\renewcommand{\thepage}{\Alph{chapter} - \arabic{page}} + +%\begin{appendices} +\input{src/app/app-grammar} +%\end{appendices} + +\end{document} diff --git a/doc/usersguide/src/app/app-grammar.tex b/doc/usersguide/src/app/app-grammar.tex new file mode 100644 index 0000000..b811332 --- /dev/null +++ b/doc/usersguide/src/app/app-grammar.tex @@ -0,0 +1,625 @@ +\chapter{Grammar of Delphi Parser Generator} + +\clearpage \section{Lexical analyzer} +\begin{verbatim} +unit dpgDpgLexer; + +lexer TdpgDpgLexer; +options +{ + testLiterals = false; + k = 2; +} + +tokens +{ + "unit"; + "uses"; + "const"; + "type"; + + "lexer"; + "parser"; + + "options"; + "tokens"; + "memberdecl"; + "memberdef"; + + "private"; + "protected"; + "public"; + + "returns"; + "local"; + + "except"; + "finally"; + + SEMPRED; + + USES; + OPTIONS; + TOKENS; +} + +// -------------------------------------------------------- +// Simple tokens +// -------------------------------------------------------- +LPAREN: '('; +RPAREN: ')'; +RCURLY: '}'; +COLON: ':'; +SEMI: ';'; +COMMA: ','; +ASSIGN: '='; +IMPLIES: "=>"; +QUEST: '?'; +PLUS: '+'; +STAR: '*'; +NOT: '~'; +OR: '|'; +BANG: '!'; +WILDCARD: '.'; +RANGE: ".."; + +// -------------------------------------------------------- +// Character literal +// -------------------------------------------------------- +CHARLIT + : '\''! (ESC | ~'\'') '\''! ; + +// -------------------------------------------------------- +// String literal +// -------------------------------------------------------- +STRINGLIT + : '"' (ESC | ~'"')* '"' ; + +// -------------------------------------------------------- +// Integer +// -------------------------------------------------------- +INTEGER local +{ + i: integer; + v: integer; +} + : DNUMBER + { + v := 0; + for i:=1 to Length( TokenText) do + begin + v := v * 10 + ord( TokenText[i]) - ord('0'); + end; + + TokenText := IntToStr( v); + } + ; + + + + +// -------------------------------------------------------- +// Argument action +// -------------------------------------------------------- +ARGACTION + : + '['! + ( + '\r' '\n' { newLine; } + | '\r' { newLine; } + | '\n' { newLine; } + | ~']' + )* + ']'! + ; + +// -------------------------------------------------------- +// Action +// -------------------------------------------------------- +ACTION + : + '{' + ( + '\r' '\n' { newLine; } + | '\r' { newLine; } + | '\n' { newLine; } + | ~'}' + )* + '}' + ( '?'! { _ttype := TT_SEMPRED; } )? + ; + +// -------------------------------------------------------- +// Token ref +// -------------------------------------------------------- +TOKENREF +options +{ + testLiterals = true; +} + : 'A'..'Z' ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* ; + +// -------------------------------------------------------- +// Rule ref +// -------------------------------------------------------- +RULEREF +local +{ + t: integer; +} + : + t = INT_RULEREF { _ttype := t; } + ( + {t = LT_uses}? WS_LOOP ('{' { _ttype := TT_USES; } )? + | {t = LT_options}? WS_LOOP ('{' { _ttype := TT_OPTIONS; } )? + | {t = LT_tokens}? WS_LOOP ('{' { _ttype := TT_TOKENS; } )? + )? + ; + +// -------------------------------------------------------- +// Internal rule ref +// -------------------------------------------------------- +protected INT_RULEREF returns [integer] +{ + _ttype := TT_RULEREF; +} + : 'a'..'z' ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* + { + result := TestLiteral( _ttype); + } + ; + +// -------------------------------------------------------- +// COMMENT +// -------------------------------------------------------- +COMMENT + : SLCOMMENT { _ttype := TT_SKIP; } + | MLCOMMENT { _ttype := TT_SKIP; } + ; + +// -------------------------------------------------------- +// SLCOMMENT +// -------------------------------------------------------- +protected SLCOMMENT + : + "//" + ( ~( '\r' | '\n') )* + ( + '\r' '\n' { newLine; } + | '\r' { newLine; } + | '\n' { newLine; } + ) + ; + + + + + + +// -------------------------------------------------------- +// Multi line comment version +// Nested comments aren't allowed! +// -------------------------------------------------------- +protected MLCOMMENT + : + "(*" + ( + options + { + greedy = false; + } + : '\r' '\n' { newLine; } + | '\r' { newLine; } + | '\n' { newLine; } + | . + )* + "*)" + ; + +// -------------------------------------------------------- +// Numbers +// -------------------------------------------------------- +protected DNUMBER: '0'..'9' (DDIGIT)*; +protected DDIGIT: '0'..'9'; + +// -------------------------------------------------------- +// WS +// -------------------------------------------------------- +WS + : + ( + ' ' + | '\t' { tab; } + | '\r' '\n' { newLine; } + | '\r' { newLine; } + | '\n' { newLine; } + ) + { + _ttype := TT_SKIP; + } + ; + + + + + + + +// -------------------------------------------------------- +// WS_LOOP +// -------------------------------------------------------- +protected +WS_LOOP + : + ( + options + { + greedy = true; + } + : WS + | COMMENT + )* + ; + +// -------------------------------------------------------- +// Esc +// -------------------------------------------------------- +protected +ESC + : '\\'! ( 'r' | 'n' | 't' | '\'' | '"' ) + ; + +\end{verbatim} + + +\clearpage \section{Parser} +\begin{verbatim} +unit dpgDpgParser; + +parser TdpgDpgParser; +options +{ + defaultErrorHandler = false; + importVocab = dpgDpgLexer; + k = 2; +} + +// -------------------------------------------------------- +// grammar +// -------------------------------------------------------- +grammar + : "unit" id SEMI + (usesDecl)? + (constDecl)? + (typeDecl)? + classDecl + ; + +// -------------------------------------------------------- +// usesDecl +// -------------------------------------------------------- +usesDecl + : USES + ( + TOKENREF SEMI + | RULEREF SEMI + )* + + RCURLY + ; + +// -------------------------------------------------------- +// constDecl +// -------------------------------------------------------- +constDecl + : "const" ACTION + ; + +// -------------------------------------------------------- +// typeDecl +// -------------------------------------------------------- +typeDecl + : "type" ACTION + ; + +// -------------------------------------------------------- +// classDecl +// -------------------------------------------------------- +classDecl +local +{ + grType: integer; +} + : + // -------------------------------------------------- + // Determine parser type + // -------------------------------------------------- + ( "lexer" { grType := 0; } + | "parser" { grType := 1; } + ) + + // -------------------------------------------------- + // get class name + // -------------------------------------------------- + id + SEMI + + // -------------------------------------------------- + // Process optional class "options {...}" clause + // -------------------------------------------------- + (classOptions)? + + // -------------------------------------------------- + // Process optional class "tokens {...}" clause + // But only for lexers. + // -------------------------------------------------- + ( {grType=0}? classTokens)? + + // -------------------------------------------------- + // Process optional class "memberDecl {...}" clause + // -------------------------------------------------- + (classMemberDecl)? + + // -------------------------------------------------- + // Well, the rules + // -------------------------------------------------- + rules + + // -------------------------------------------------- + // Process optional class "memberDecl {...}" clause + // -------------------------------------------------- + (classMemberDef)? + ; +// -------------------------------------------------------- +// classOptions +// -------------------------------------------------------- +classOptions + : OPTIONS ( id ASSIGN optionValue SEMI )* RCURLY + ; + +// -------------------------------------------------------- +// classTokens +// -------------------------------------------------------- +classTokens + : + TOKENS + ( + TOKENREF SEMI + | STRINGLIT SEMI + )* + + RCURLY + ; + +// -------------------------------------------------------- +// classMemberDecl +// -------------------------------------------------------- +classMemberDecl + : "memberDecl" ACTION + ; + +// -------------------------------------------------------- +// classMemberDef +// -------------------------------------------------------- +classMemberDef + : "memberDef" ACTION + ; + +// -------------------------------------------------------- +// rules +// -------------------------------------------------------- +rules + : (rule)* + ; + +// -------------------------------------------------------- +// ruleExceptionBlock +// -------------------------------------------------------- +ruleExceptionBlock + : "except" ACTION + | "finally" ACTION + ; +// -------------------------------------------------------- +// altExceptionBlock +// -------------------------------------------------------- +altExceptionBlock + : "except" ACTION + | "finally" ACTION + ; + +// -------------------------------------------------------- +// rule +// -------------------------------------------------------- +rule + : + // -------------------------------------------------- + // Parse rule scope + // -------------------------------------------------- + ( "public" + | "protected" + | "private" + )? + + // -------------------------------------------------- + // Parse rule name + // -------------------------------------------------- + id + + // -------------------------------------------------- + // Optional arguments + // -------------------------------------------------- + (ARGACTION)? + + // -------------------------------------------------- + // Optional return type + // -------------------------------------------------- + ("returns" ARGACTION)? + + // -------------------------------------------------- + // Optional rule options + // -------------------------------------------------- + (ruleOptions)? + + // -------------------------------------------------- + // Optional rule local variable declarations + // -------------------------------------------------- + ("local" ACTION)? + + // -------------------------------------------------- + // Optional rule init action + // -------------------------------------------------- + (ACTION)? + + // -------------------------------------------------- + // Rule block + // -------------------------------------------------- + COLON + block + SEMI + + // -------------------------------------------------- + // Optional exception handler + // -------------------------------------------------- + (ruleExceptionBlock)? + ; + +// -------------------------------------------------------- +// block +// -------------------------------------------------------- +block + : alternative (OR alternative)* + ; + +// -------------------------------------------------------- +// alternative +// -------------------------------------------------------- +alternative + : (elem)* + (altExceptionBlock)? + ; + +// -------------------------------------------------------- +// elem +// -------------------------------------------------------- +elem + : element + ; + +// -------------------------------------------------------- +// element +// -------------------------------------------------------- +element +local +{ + assignLabel : IdpgToken; +} +{ + assignLabel := nil; +} + : + ( + id ASSIGN + (id COLON)? + ( + RULEREF (ARGACTION)? (BANG)? + | TOKENREF (ARGACTION)? + ) + ) + | + (assignLabel=id COLON)? + ( + RULEREF (ARGACTION)? (BANG)? + | range[assignLabel] + | terminal[assignLabel] + | NOT (notTerminal[assignLabel] | ebnf[ assignLabel, true]) + | ebnf[ assignLabel, false] + ) + | ACTION + | SEMPRED + ; + +// -------------------------------------------------------- +// range +// -------------------------------------------------------- +range [pTokenLabel: IdpgToken] +local + : + CHARLIT RANGE CHARLIT + | (TOKENREF | STRINGLIT) RANGE (TOKENREF | STRINGLIT) + ; +// -------------------------------------------------------- +// terminal +// -------------------------------------------------------- +terminal [pTokenLabel: IdpgToken] + : + CHARLIT (BANG)? + | TOKENREF (BANG)? (ARGACTION)? + | STRINGLIT (BANG)? + | WILDCARD (BANG)? + ; + +// -------------------------------------------------------- +// notTerminal +// -------------------------------------------------------- +notTerminal [pTokenLabel: IdpgToken] + : CHARLIT (BANG)? + | TOKENREF (BANG)? + ; + +// -------------------------------------------------------- +// ebnf +// -------------------------------------------------------- +ebnf [pTokenLabel: IdpgToken; pTokenNot: boolean] + : LPAREN + ( + subRuleOptions (ACTION)? COLON + | ACTION COLON + )? + + block + RPAREN + ( QUEST + | STAR + | PLUS + | IMPLIES + )? + ; + +// -------------------------------------------------------- +// subruleOptions +// -------------------------------------------------------- +subruleOptions + : OPTIONS (id ASSIGN optionValue)* SEMI RCURLY + ; + +// -------------------------------------------------------- +// ruleOptions +// -------------------------------------------------------- +ruleOptions + : OPTIONS (id ASSIGN optionValue)* SEMI RCURLY + ; + +// -------------------------------------------------------- +// optionValue +// -------------------------------------------------------- +optionValue returns [IdpgToken] + : result=qualifiedId + | result:STRINGLIT + | result:CHARLIT + | result:INTEGER + ; + +// -------------------------------------------------------- +// qualifiedId +// -------------------------------------------------------- +qualifiedId returns [IdpgToken] + : id (WILDCARD id)* + ; +// -------------------------------------------------------- +// id +// -------------------------------------------------------- +id returns [IdpgToken] + : result:TOKENREF + | result:RULEREF + ; +\end{verbatim} diff --git a/doc/usersguide/src/error/error-err.tex b/doc/usersguide/src/error/error-err.tex new file mode 100644 index 0000000..a504158 --- /dev/null +++ b/doc/usersguide/src/error/error-err.tex @@ -0,0 +1,77 @@ +\section{Error handling} + +All syntactic and semantic errors cause parser exceptions to be thrown. In +particular, the methods used to match tokens in the parser base class (match et +al) throw §EdpgMismatchedToken§. The methods in the lexer base class used to +match characters (match et al) throw analogous exceptions. + +\subsection{DPG exception hierarchy} + +DPG-generated parsers throw exceptions to signal recognition errors or other +stream problems. All exceptions derive from EdpgException. The hierarchy is the +following: + +\begin{verbatim} + EdpgException + EdpgMismatchedChar + EdpgMismatchedToken + EdpgSemantic +\end{verbatim} + +\subsubsection{EdpgException} The EdpgException exception class is the base of +all DPG generated exceptions. User defined exceptions must derive from this +class. + +\subsubsection{EdpgMismatchedChar} This exception is thrown by the lexer when it +is looking for a character, but finds a different one on the input stream. + +\subsubsection{EdpgMismatchedToken} This exception is thrown by the parser when +it is looking for a token, but finds a different one on the input token stream. + +\subsubsection{EdpgSemantic} This exception is thrown by a validating semantic +predicate. + +\subsection{Specifying exception handlers} + +DPG allows to specify specific exception handler to a given rule or +alternative. The general form of an exception handler specification is: + +\begin{verbatim} + ... except { code to handle exception } + ... finally { code to handle exception } +\end{verbatim} + +\subsubsection{Exception handler for a rule} + +The exception handler for a rule must be placed after the terminating +semicolon. The handler can be either an §except§ block or a §finally§ block. +The implementation of rule will be surrounded by a try block. + +\begin{verbatim} + r : ... + ; + except { handler code } +\end{verbatim} + +\subsubsection{Exception handler for an alternative} + +The exception handler of an alternative must be the last element of the +alternative. Both exception handler blocks can be used. Every alternative that +have exception block specified, will be surrounded by a §try...except/finally§ +block. + +\begin{verbatim} + r : alternative_1 ... except { handler code } + | alternative_2 ... finally { handler code } + ... + | alternative_n + ; +\end{verbatim} + +\paragraph{Note:} It is not necessary to define exception handler for each alternative. + +\subsubsection{Default error handler in lexer} + +To skip every character that isn't recognized by any public lexer rule, specify +§filter=true§ option for a lexer. That way, the parser doesn't have to deal +with lexical errors and ask for another token. diff --git a/doc/usersguide/src/error/error.tex b/doc/usersguide/src/error/error.tex new file mode 100644 index 0000000..e69de29 diff --git a/doc/usersguide/src/gram/gram.tex b/doc/usersguide/src/gram/gram.tex new file mode 100644 index 0000000..ad2571c --- /dev/null +++ b/doc/usersguide/src/gram/gram.tex @@ -0,0 +1,53 @@ +\chapter{Grammars} +\minitoc \clearpage + +\section{Structure of a grammar} + +The generic structure of a DPG grammar is the following: +\begin{itemize} + \item \emph{unit declaration} + \item \emph{unit sections} + \item \emph{grammar class definition} + \item \emph{grammar class sections} +\end{itemize} +\paragraph{Note:} the order of blocks cannot be changed! + +\subsection{Unit declaration} +The $unit$~$declaration$ is always the first block in any DPG grammar. It +specifies the name of the target Pascal unit generated by DPG from the +grammar. The syntax is identical to that of Delphi. +\begin{alltt} + \textbf{unit} \emph{UnitName} ; +\end{alltt} + +\subsection{Unit sections} +The $unit$~$sections$ block must follow the $unit$~$declaration$ +block if it exists. The members of this block are optional, but +they must appear in the following order: +\begin{itemize} + \item \emph{uses section} + \item \emph{const section} + \item \emph{type section} +\end{itemize} + +\subsection{Grammar class definition} +This block defines the type of the grammar class. The possible types are +§lexer§ and §parser§. +\begin{alltt} + \textbf{lexer} \emph{myLexer} ; // define lexer +\end{alltt} +or +\begin{alltt} + \textbf{parser} \emph{myParser} ; // define parser +\end{alltt} + +\subsection{Grammar class sections} +This block may contain the following sections in the order +specified: +\begin{itemize} + \item \emph{options section} + \item \emph{tokens section} (only for lexers) + \item \emph{memberdecl section} + \item \emph{rule definitions} + \item \emph{memberdef section} +\end{itemize} diff --git a/doc/usersguide/src/intro/intro.tex b/doc/usersguide/src/intro/intro.tex new file mode 100644 index 0000000..ca872ed --- /dev/null +++ b/doc/usersguide/src/intro/intro.tex @@ -0,0 +1,63 @@ +\chapter{Introduction} +\minitoc \clearpage + +\section{Overview} +The Delphi Parser Generator is a language tool which automatically +generates $LL(k)$ parsers in Object Pascal Language based on an +intuitive grammar, similar to §EBNF§. The generated code mimics a +hand-written parser, so that it is easier to debug and leads to +shortened development time compared to state-machine based $LR$ or +DFA/NFA parsers. To compensate theoretical limitations of $LL(k)$ +parsers, DPG features several powerful extensions enhancing its +functionality far beyond that of standard $LL(k)$ parsers. The +method of syntactic and semantic predicates makes the writing of +meta-parsers simple and routine. The philosophy of DPG is to allow +the programmer maximum control over the parsing process while +eliminating all the routine work. + +\section{Features} +\begin{itemize} + \item[-] Delphi code generator for $LL(k)$ lexers and parsers. + \item[-] Intuitive and consistent EBNF like syntax for both the lexer and the parser generator + resulting in a shallow learning curve. + \item[-] Extremely easy-to-read generated code undistinguishable from hand-written + parsers. The inlined statements are properly indented relative to the surrounding + program code. + \item[-] Syntactic predicates allow for conditional parsing based on + formal syntactic conditions, enhancing the functionality of the $LL(k)$ parsers + considerably. + \item[-] Semantic predicates allow for conditional parsing based on + essentially arbitrary conditions. For example, a DOM-based XML parser is easily + written by semantic predicates using an internal hash-table representation of + the DOM. Using traditional state-machine based parsers (like §YACC§), programmers + often need to delegate parsing tasks to the hand written part of the code. This + burdens them with laborious and error-prone routine work. Semantic predicates + prevent this, since the parser is allowed to use run-time information for the + parsing process dynamically. + \item[-] Actions can be inserted in the rules at every possible place. These actions can be + used for controlling the parsing process with high granularity. + \item[-] All rules may have return values and arguments. Rule arguments add a powerful + metaparsing capability completing the predicate and action mechanism optimally. + \item[-] All rules may have a code initialization section. This special feature is tuned + for Pascal to allow the programmer to declare and initialize local variables for each rule. + \item[-] Many convenient extensions to the plain §BNF§ syntax, such as §(...)§, §(...)?§, + §(...)+§, §(...)*§, which simplify the task of writing grammars and makes it less + error-prone. + \item[-] Element complements allow for matching a text not matching a given rule. + \item[-] Element labels are used to directly map rule information + to Pascal variables. They provide a seamless interaction between the + generated and user-written code. + \item[-] Intuitive Graphical User Interface with syntax highlighting, and + project management capabilities. +\end{itemize} + +\section{Installation} +The first step in using DPG is to install it in Delphi. However, before using +DPG be sure to read over the License Agreement. +\begin{itemize} + \item[-] run setup.exe and follow the instructions + \item[-] run Delphi and add your DPG run-time library directory to Delphi's + library path. For example, to do this for Delphi 6 select \emph{Tools} §|§ \emph{Environment Options} + on the menu bar. Go to the \emph{Library} tab and add the full path of your DPG run-time directory + to the \emph{Library Path} if you have not already done so. +\end{itemize} diff --git a/doc/usersguide/src/lang/lang-atomprod.tex b/doc/usersguide/src/lang/lang-atomprod.tex new file mode 100644 index 0000000..1e19091 --- /dev/null +++ b/doc/usersguide/src/lang/lang-atomprod.tex @@ -0,0 +1,40 @@ +\section{Atomic production elements} +\subsection{Character literal} +Single characters enclosed in quotes are character literals. A +character literal can only be referred to within a lexer rule. For +example, §'{'§ needs not be escaped as you are specifying the +literal character which is to be matched. Meta symbols are used +outside of characters and string literals to specify lexical +structure. Special characters can be specified in a similar way to +§C§ escape sequences. DPG accepts the following escape sequences: +§\n§, §\r§, §\t§, §\'§, §\"§, §\\§. The §#xx§ form is not accepted +by DPG. + +\subsection{String literal} +String literals are sequences of characters enclosed in double quotes. The same +escape sequences can be used in string literals as in character literals. +In parser rules, strings represent tokens, and each unique string is assigned +to a token type. Referring to a string within a lexer rule matches the +indicated sequence of characters and is a shorthand notation. For example, +consider the following equivalent lexer rule definitions: +\begin{verbatim} + BEGIN : "begin"; + BEGIN : 'b' 'e' 'g' 'i' 'n'; +\end{verbatim} + +\subsection{Wildcard} +The wildcard §.§ within a parser rule matches any single token; +within a lexer rule it matches any single character. + +\subsection{Token reference} +Identifiers beginning with an uppercase letter are treated as +token references. The subsequent characters may be a mixture of +letters, digits or underscores. Referencing a token in a parser +rule implies that you want to recognize a token with the specified +token type. This does not actually call the associated lexer rule +-- the lexical analysis phase delivers a stream of tokens to the +parser. A token reference within a lexer rule implies a method +call to that rule, and carries the same analysis semantics as a +rule reference within a parser. So, you may specify rule arguments +and return values for non-public tokens and for every parser rule. +See the next section on rule references. diff --git a/doc/usersguide/src/lang/lang-err.tex b/doc/usersguide/src/lang/lang-err.tex new file mode 100644 index 0000000..a504158 --- /dev/null +++ b/doc/usersguide/src/lang/lang-err.tex @@ -0,0 +1,77 @@ +\section{Error handling} + +All syntactic and semantic errors cause parser exceptions to be thrown. In +particular, the methods used to match tokens in the parser base class (match et +al) throw §EdpgMismatchedToken§. The methods in the lexer base class used to +match characters (match et al) throw analogous exceptions. + +\subsection{DPG exception hierarchy} + +DPG-generated parsers throw exceptions to signal recognition errors or other +stream problems. All exceptions derive from EdpgException. The hierarchy is the +following: + +\begin{verbatim} + EdpgException + EdpgMismatchedChar + EdpgMismatchedToken + EdpgSemantic +\end{verbatim} + +\subsubsection{EdpgException} The EdpgException exception class is the base of +all DPG generated exceptions. User defined exceptions must derive from this +class. + +\subsubsection{EdpgMismatchedChar} This exception is thrown by the lexer when it +is looking for a character, but finds a different one on the input stream. + +\subsubsection{EdpgMismatchedToken} This exception is thrown by the parser when +it is looking for a token, but finds a different one on the input token stream. + +\subsubsection{EdpgSemantic} This exception is thrown by a validating semantic +predicate. + +\subsection{Specifying exception handlers} + +DPG allows to specify specific exception handler to a given rule or +alternative. The general form of an exception handler specification is: + +\begin{verbatim} + ... except { code to handle exception } + ... finally { code to handle exception } +\end{verbatim} + +\subsubsection{Exception handler for a rule} + +The exception handler for a rule must be placed after the terminating +semicolon. The handler can be either an §except§ block or a §finally§ block. +The implementation of rule will be surrounded by a try block. + +\begin{verbatim} + r : ... + ; + except { handler code } +\end{verbatim} + +\subsubsection{Exception handler for an alternative} + +The exception handler of an alternative must be the last element of the +alternative. Both exception handler blocks can be used. Every alternative that +have exception block specified, will be surrounded by a §try...except/finally§ +block. + +\begin{verbatim} + r : alternative_1 ... except { handler code } + | alternative_2 ... finally { handler code } + ... + | alternative_n + ; +\end{verbatim} + +\paragraph{Note:} It is not necessary to define exception handler for each alternative. + +\subsubsection{Default error handler in lexer} + +To skip every character that isn't recognized by any public lexer rule, specify +§filter=true§ option for a lexer. That way, the parser doesn't have to deal +with lexical errors and ask for another token. diff --git a/doc/usersguide/src/lang/lang-opt.tex b/doc/usersguide/src/lang/lang-opt.tex new file mode 100644 index 0000000..467940a --- /dev/null +++ b/doc/usersguide/src/lang/lang-opt.tex @@ -0,0 +1,265 @@ +\section{Options} + +The §options{...}§ section is used to specify options for grammar +elements. i.e. elements are the lexer/parser classes, rules and +subrules. This section is preceded by the options keyword and +contains a series of option/value assignments surrounded by curly +braces. + +\subsection{k} +\begin{table}[H] + \small + \begin{tabular}{rl} + \emph{synopsis:} & set lookahead depth \\ + \emph{context:} & parser/lexer class declaration \\ + \emph{type:} & integer \\ + \emph{default:} & 1 + \end{tabular} +\end{table} + +For any grammar, the lookahead depth can be specified by using the $k$ option. + +\begin{verbatim} + lexer myLexer; + options + { + k = 2; + } +\end{verbatim} + +Setting the lookahead depth changes the maximum number of tokens that will be +examined to select alternative productions, and test for exit conditions of the +§EBNF§ constructs §(...)?§, §(...)+§, and §(...)*§. The lookahead analysis is +linear approximate (as opposed to full $LL(k)$ ). Consider this example with +$k=2$: +\begin{verbatim} + r : ( A B | B A ) + | A A + ; +\end{verbatim} + +Full $LL(k)$ analysis would resolve the ambiguity and produce a +lookahead test for the first alternative like: +\begin{verbatim} + if (LA(1)=A and LA(2)=B) or (LA(1)=B and LA(2)=A) +\end{verbatim} + +Linear approximate analysis would logically OR the lookahead sets at each +depth, resulting in a test like: + +\begin{verbatim} + if (LA(1)=A or LA(1)=B) and (LA(2)=A or LA(2)=B) +\end{verbatim} + +Which is ambiguous for the second alternative for §{A,A}§. +Therefore, setting the lookahead depth very high tends to yield +diminishing returns in most cases, because the lookahead sets at +large depths will include almost everything. This problem can be +solved using a syntactic predicate. + + +\subsection{importVocab} +\begin{table}[H] + \small + \begin{tabular}{rl} + \emph{synopsis:} & set initial grammar vocabulary \\ + \emph{context:} & parser/lexer class declaration \\ + \emph{type:} & ID \\ + \emph{default:} & none + \end{tabular} +\end{table} + +The import vocabulary for a grammar class can be specified using the +§importVocab§ option. + +\begin{verbatim} + lexer myLexer; + options + { + importVocab = XML; + } +\end{verbatim} + +DPG will look for the token exchange file named §XMLTokens.txt§, +and import all the token definitions from it. Parser grammar must +use this option, because without that, it cannot communicate with +the lexer. Lexer grammar can use this option too. It is useful, +when a parser class uses multiple lexers to get tokens from the +input stream. The vocabulary file has an identifier on the first +line that names the token vocabulary. All subsequent lines are of +the form §ID=value§ or §ID="literal"=value§. For example: + +\begin{verbatim} + ThocLexer + TT_EOF = 1 + TT_LPAREN = 4 + TT_RPAREN = 5 + LT_const = "const" = 6 +\end{verbatim} + +The token exchange file is automatically generated by DPG for each grammar. +\paragraph{Note:} you must take care of the order of grammars in a DPG project. +Vocabulary-generating grammars must appear before vocabulary-consuming +grammars. + +\subsection{exportVocab} +\begin{table}[H] + \small + \begin{tabular}{rl} + \emph{synopsis:} & set export grammar vocabulary \\ + \emph{context:} & parser/lexer class declaration \\ + \emph{type:} & ID \\ + \emph{default:} & grammar class name + \end{tabular} +\end{table} + +The vocabulary of a grammar is the union of the set of tokens provided by an +§importVocab§ option and the set of tokens and literals defined in the grammar. + +\begin{verbatim} + lexer myParser; + options + { + exportVocab = XML1; + } +\end{verbatim} + +If the exportVocab options isn't specified, then DPG will use the +grammar class name to export the vocabulary. DPG generates the +following files for the examp\-le above: §XML1Tokens.txt§ for +token exchange, and XML1Tokens.pas for the grammar class. + +\subsection{testLiterals} +\begin{table}[H] + \small + \begin{tabular}{rl} + \emph{context:} & lexer class declaration, lexer rule \\ + \emph{type:} & boolean \\ + \emph{default:} & false + \end{tabular} +\end{table} + +By default, DPG doesn't generate code to check the literals table +(the table generated for literal strings), because checking the +literals table after each token recognition is expensive. Instead, +it checks string literals in a lexer rule, that can recognize +them. The string literals table contains the strings defined in +the §tokens{...}§ section of a lexer grammar. +\begin{verbatim} + lexer myLexer; + options + { + testLiterals = false; + } + tokens + { + "function"; + "procedure"; + ... + } + + ID + options + { + testLiterals = true; + } + : (‘A’..’Z’ | ‘a’..’z’)(‘A’..’Z’ | ‘a’..’z’ | ‘0’..‘9’)* + ; +\end{verbatim} + +In the example above, if the input is matched by the rule §ID§ +then the implementation of the rule will check the literals table +for the matched token. If it exists, then the returned token type +will be set to the token type assigned to the string literal in +the literals table. Otherwise the returned token type will remain +unchanged. + +It is possible to check the literals table explicitly within an +action using the Test\-Li\-te\-ral method: + +\begin{verbatim} + { + ... + _ttype := TestLiteral; + _ttype := TestLiteral( _ttype); + ... + } +\end{verbatim} + +\subsection{caseSensitive} +\begin{table}[H] + \small + \begin{tabular}{rl} + \emph{context:} & lexer class declaration \\ + \emph{type:} & boolean \\ + \emph{default:} & false + \end{tabular} +\end{table} + +\begin{verbatim} + lexer myLexer; + options + { + caseSensitive = true; + } +\end{verbatim} + +Case is ignored when comparing against character and string literals in the +lexer. The case of the input stream is maintained when stored in the token +objects. + +\subsection{filter} +\begin{table}[H] + \small + \begin{tabular}{rl} + \emph{context:} & lexer class declaration \\ + \emph{type:} & boolean / ID \\ + \emph{default:} & false + \end{tabular} +\end{table} + +\begin{verbatim} + lexer myLexer; + options + { + filter = true; + } +\end{verbatim} + +When §true§, the lexer ignores any input not exactly matching one of the public +lexer rules. + +Notice that the filter rule must track new-lines in the general +case where the lexer might emit error messages. + +When set to a rule name, the filter rule is invoked either when the lookahead +(in nextToken) predicts none of the public lexical rules or when one of those +rules fails. In the latter case, the input is rolled back before attempting +the filter rule. Option §filter=true§ is like having a filter rule such as: + +\begin{verbatim} + IGNORE : . ; +\end{verbatim} + +\subsection{ignore} +\begin{table}[H] + \small + \begin{tabular}{rl} + \emph{context:} & lexer rule \\ + \emph{type:} & ID \\ + \emph{default:} & none + \end{tabular} +\end{table} + +\begin{verbatim} + lexer myLexer; + options + { + ignore = MyIgnoreRule; + } +\end{verbatim} + +Specify a lexer rule to use a white space between lexical rule +atomic elements (chars, strings, and rule references). The grammar +analysis, and hence the look\-ahead sets, are aware of the +whitespace references. diff --git a/doc/usersguide/src/lang/lang-prodoper.tex b/doc/usersguide/src/lang/lang-prodoper.tex new file mode 100644 index 0000000..9ee2bba --- /dev/null +++ b/doc/usersguide/src/lang/lang-prodoper.tex @@ -0,0 +1,41 @@ +\section{Production element operators} + +\subsection{Element complement} +The unary not operator $\sim$ may be applied to an atomic element +such as a token identifier. For some token atom §T§, $\sim$§T§ +matches any token other than §T§ except end-of-file. Within lexer +rules, $\sim$§'a'§ matches any character other than character +§'a'§. The sequence $\sim$§.§ (``not anything'') is meaningless +and not allowed. Example: +\begin{verbatim} + SL_COMMENT : "//" (~'\n')* '\n'; +\end{verbatim} + +\subsection{Set complement} +The unary not operator $\sim$ can also be used to construct a +token set or character set by complementing another set. This is +most useful when you want to match tokens or characters until a +certain delimiter set is encountered. Rather than invent a special +syntax for such sets, DPG allows the placement of $\sim$ in front +of a subrule containing only simple elements and no actions. The +simple elements may be token references, token ranges, character +literals, or character ranges. For example: +\begin{verbatim} + SL_COMMENT : "//" (~('\r'|'\n'))* ('\r'|'\n'); +\end{verbatim} + +\subsection{Range operator} +The binary range operator §..§ is used to define a range of atom +which may be matched. The expression §c1..c2§ in a lexer matches +characters included in that range. The expression §T..U§ in a +parser matches any token whose token type is inclusively in that +range, which is of dubious value if the token types are generated +externally. + +\subsection{Ignore operator} +In lexer grammars, the ignore operator §!§ can be applied to any +atomic production element. It means that the element followed by +the §!§ operator should not appear in the result token. Example: +\begin{verbatim} + STRING : '"'! (~'"')* '"'! ; +\end{verbatim} diff --git a/doc/usersguide/src/lang/lang-rest.tex b/doc/usersguide/src/lang/lang-rest.tex new file mode 100644 index 0000000..ca509fd --- /dev/null +++ b/doc/usersguide/src/lang/lang-rest.tex @@ -0,0 +1,82 @@ +\section{Element labels} + +Any atomic production element can be labeled by an identifier (case is insignificant). +For a labelled atomic element, the identifier is used within a semantic action to access +the associated Token object or character. For example, + +\begin{verbatim} + assign + : v:ID EQUALS expr SEMI + { + writeln(‘Assign to ‘ + v.TokenText); + } + ; +\end{verbatim} + +\section{EBNF rule elements} + +DPG supports the following extended BNF notations: +\begin{table}[H] + \small + \begin{tabular}{ll} +% \hline + §(...) § & -- exactly one occurrence of a subrule \\ + §(...)?§ & -- zero or one occurrence of a subrule \\ + §(...)+§ & -- one or more occurrence of a subrule \\ + §(...)*§ & -- zero or more occurrence of a subrule +% \hline + \end{tabular} +\end{table} + +\section{Rule arguments} +Character sequences in square brackets are arguments or return type specifiers. +Square brackets within string and character literals are not argument +delimiters. The arguments within §[]§ must follow the Object Pascal syntax. + +\section{Exception handlers} + +DPG allows the specification of exception handlers specific to a +given rule or alternative. The general form of an exception +handler specification is: + +\begin{verbatim} + ... except { code to handle exception } + ... finally { code to handle exception } +\end{verbatim} + +\subsection{Exception handler for a rule} + +The exception handler for a rule must be placed after the +terminating semicolon. The handler can be either an §except§ block +or a §finally§ block. The implementation of a rule will be +surrounded by a try block. + +\begin{verbatim} + r : ... + ; + except { handler code } +\end{verbatim} + +\subsection{Exception handler for an alternative} + +The exception handler of an alternative must be the last element +of the alternative. Both exception handler blocks can be used. +Every alternative that has an exception block will be surrounded +by a §try...except/finally§ block. + +\begin{verbatim} + r : alternative_1 ... except { handler code } + | alternative_2 ... finally { handler code } + ... + | alternative_n + ; +\end{verbatim} + +\paragraph{Note:} It is not necessary to define an exception handler for each alternative. + +\subsection{Default error handler in lexer} + +To skip every character that isn't recognized by any public lexer +rule, specify the option §filter=true§ for a lexer. That way, the +parser doesn't have to deal with lexical errors and ask for +another token. diff --git a/doc/usersguide/src/lang/lang-sect.tex b/doc/usersguide/src/lang/lang-sect.tex new file mode 100644 index 0000000..9b2a081 --- /dev/null +++ b/doc/usersguide/src/lang/lang-sect.tex @@ -0,0 +1,249 @@ +\section{Sections} + +\subsection{unit} +The unit section specifies the unit name of the generated source file. +The syntax is identical to Object Pascal. + +\subsection{uses} +The §uses{...}§ section is used to specify the units which must be +included in the interface's uses clause of the generated pascal +unit. Every unit name must be terminated by a semicolon. Repeated +units are included only once. +\begin{verbatim} + uses + { + Classes; + Windows; + } +\end{verbatim} + + +\subsection{const} +The §const{...}§ section is used to specify items that appear in +the interface's const clause of the generated pascal unit. The +content of this section is copied verbatim into the unit. +\begin{verbatim} + const + { + const1 = 12; + const2 = ‘FOO’; + } +\end{verbatim} + +\subsection{type} +The §type{...}§ section is used to specify items that appear in +the interface's type clause of the generated pascal unit. The +content of this section is copied verbatim into the unit. +\begin{verbatim} + type + { + TmyType1 = integer; + TmyType2 = array [0..16] of TmyType1; + } +\end{verbatim} + +\subsection{options} +The §options{...}§ section contains options for a given grammar +element. Options can be defined for lexer/parser classes, rules +and subrules. + +\subsection{tokens} +If you need to define an ``imaginary'' token (i.e. one that has no +corresponding real input symbol) use the §tokens{...}§ section to +define them. You can also define literals in this section. + +\begin{verbatim} + tokens + { + "procedure"; + "function"; + INTEGER; + } +\end{verbatim} + +Strings defined in this way are treated just as if you had referenced them in +the parser. The formal syntax is: + +\begin{verbatim} + tokenSpecification + : "tokens" + LCURLY + (tokenItem SEMI)* + RCURLY + ; + + tokenItem + : TOKEN + | STRING + ; +\end{verbatim} + +The §tokens{...}§ section is only valid in lexer grammars. + +\subsection{memberdecl} +The §memberdecl{...}§ section contains additional member +declarations for the grammar class. It allows the expansion of the +grammar class with user defined members, so it is not necessary to +derive new classes from the generated class to implement +additional functionality. The content of this section is copied +verbatim into the class declaration of the generated grammar +class. +\begin{verbatim} + memberdecl + { + procedure proc1; + procedure proc2; + } +\end{verbatim} + +\subsection{memberdef} +The §memberdef{...}§ section contains the implementation of the +classes' additional functionality. The content of this section is +copied verbatim into the implementation part of the generated +unit. This section may also contain the initialization and +finalization clauses. + +\begin{verbatim} + memberdef + { + procedure TmyClass.proc1; + begin + ... + end; + + procedure TmyClass.proc2; + begin + ... + end; + } +\end{verbatim} + +\subsection{parser} +Parser rules must be associated with a parser class. Each parser +class specification precedes the options, and rule definitions of +the parser. Grammar files §.g§ can hold only one class definition. +A parser specification in a grammar file looks like: +\begin{verbatim} + unit myParser; + uses... // optional uses {...} section + const... // optional const {...} section + type... // optional type {...} section + + parser TmyParser; + + options... // optional options {...} section + memberdecl... // optional memberdecl {...} section + parser rules... + memberdef... // optional memberdef {...} section +\end{verbatim} + +In the generated code, the parser class results in an Object +Pascal class, and the rules become member methods of the class. + +Note, that the content of the §memberdecl{...}§ section is copied +verbatim into the class declaration part of the generated parser +class while the content of the §memberdef{...}§ section is copied +after the implementation of the member rules, so the +initialization and finalization clauses of a pascal unit can be +placed in the §memberdef{...}§ section. + +\subsection{lexer} +To perform lexical analysis, you need to specify a lexer class that describes +how to break up the input character stream into a stream of tokens. The syntax +is similar to that of a parser class: +\begin{verbatim} + unit myLexer; + uses... // optional uses {...} section + const... // optional const {...} section + type... // optional type {...} section + + lexer TmyLexer; + + options... // optional options {...} section + tokens... // optional tokens {...} section + memberdecl... // optional memberdecl {...} section + lexer rules... + memberdef... // optional memberdef {...} section +\end{verbatim} + +Lexical rules contained within a lexer class become member methods in the +generated class. A lexer grammar may have a §tokens{...}§ section to specify +imaginary tokens and string literals. + +\subsection{rule definitions} +The structure of an input stream of atoms is specified by a set of +mutually-referenced rules. Each rule has a name and any of the +following optional attributes: a scope specifier; a set of +arguments; an init-action; a return value; local variable +definitions; an exception handler and an alternative or +alternatives. Each alternative contains a series of elements that +specify what to match and where. Scope can be specified by +private, protected, or public keywords. A rule has public scope by +default. The basic form of a rule is: +\begin{verbatim} + (scope) rulename + : alternative_1 + | alternative_2 + ... + | alternative_n + ; +\end{verbatim} + +Parameters for a rule can be specified in the following form: +\begin{verbatim} + rulename [formal parameters] : ... ; +\end{verbatim} + +If the rule returns a value, it’s type can be defined with the +returns keyword: +\begin{verbatim} + rulename returns [typename] : ... ; +\end{verbatim} + +where §typename§ is a valid Object Pascal type specifier. + +Local variables for a rule can be defined in the §local{...}§ section: + +\begin{verbatim} + rule + local + { + foo: integer; + bar: string; + } +\end{verbatim} + +Init-actions are specified before the colon. Init-actions differ from normal +actions because they are always executed regardless of guess mode. + +\begin{verbatim} + rule + { + init-action + } + : ... ; +\end{verbatim} + + +\paragraph{Parser rules} apply structure to a stream of tokens, whereas +lexer rules apply structure to a stream of characters. Parser +rules, therefore, must not reference cha\-rac\-ter literals. +Double-quoted strings in parser rules are considered to be token +references. Note: all parser rules must begin with a lowercase +letter. + +\paragraph{Lexer rules} defined within a lexer grammar must have a name beginning +with an uppercase letter. These rules implicitly match +cha\-rac\-ters on the input stream instead of tokens on the token +stream. Referenced grammar elements include token references +(implicit lexer rule references), cha\-rac\-ters and strings. +Lexer rules are processed in the same manner as parser rules, and +may also specify arguments and return values. A scope specifier +for a lexer rule has special meaning in lexer grammars. In the +generated Object Pascal unit, the lexer class has a §nextToken§ +function which is the interface between the lexer and the parser. +This function is synthesized from the public lexer rules. It means +that non-public lexer rules don't modify the prediction logic of +the lexer. They are usually helper rules. If the lexer grammar has +no public rule at all, the §nextToken§ function returns EOF to the +parser. diff --git a/doc/usersguide/src/lang/lang-simpprod.tex b/doc/usersguide/src/lang/lang-simpprod.tex new file mode 100644 index 0000000..4891eaf --- /dev/null +++ b/doc/usersguide/src/lang/lang-simpprod.tex @@ -0,0 +1,79 @@ +\section{Simple production elements} +\subsection{Rule reference} +Identifiers beginning with lowercase letter are treated as parser +rule references. The subsequent characters may be any letter, +digit, number, or underscore. Lexical rules may not reference +parser rules. Referencing a rule implies a method call to that +rule at that point in the parse. You may pass parameters and +obtain return values. For example, formal and actual parameters +are specified within square brackets: +\begin{verbatim} + function + : type ID LPAREN args RPAREN block [1] + ; + + block [scope: integer] + : LCURLY + ... + { (* use arg 'scope' *) } + ... + RCURLY + ; +\end{verbatim} + +Return values that are stored in variables use a simple assignment +notation: +\begin{verbatim} + set + local + { + ids : TStringList; + } + { + ids := nil; + } + : LPAREN ids=idList RPAREN + ; + + idList returns [TStringList] + { + result := TStringList.Create; + } + : id:ID { result.Add( id.TokenText;); } + ( + COMMA id:ID + { + result.Add( id.TokenText;); + } + )* + ; +\end{verbatim} + +\subsection{Semantic action} +Actions are blocks of Object Pascal source code enclosed in curly braces. The +code is executed after the preceding production element has been recognized and +before the recognition of the following element. Actions are typically used to +generate out\-put, construct trees, or modify a symbol table. An action's +position dictates when it is recognized relative to the surrounding grammar +elements. + +If the action is the first element of a production, it is executed +before any other e\-le\-ment in that production, but only if that +production is predicted by the lookahead. + +The first action of an §EBNF§ subrule may be followed by §:§. +Doing so de\-sig\-na\-tes the action as an init-action and +associates it with the subrule as a whole, instead of any +production. It is executed immediately upon entering the subrule, +and is executed even while guessing (testing syntactic +predicates). For example: + +\begin{verbatim} + ( { init-action} : + { action of 1st production} production1 + | { action of 2nd production} production2 + )? +\end{verbatim} + +The init-action would be executed regardless of what (if anything) +matched in the optional subrule. diff --git a/doc/usersguide/src/lang/lang-syntactic.tex b/doc/usersguide/src/lang/lang-syntactic.tex new file mode 100644 index 0000000..f5fa83e --- /dev/null +++ b/doc/usersguide/src/lang/lang-syntactic.tex @@ -0,0 +1,49 @@ +Delphi Parser Generator (DPG) uses the ASCII character set, +including the letters \emph{A} through \emph{Z} and \emph{a} +through \emph{z}, the digits \emph{0} through \emph{9}, and other +standard characters. It is case sensitive. The space character +(ASCII 32), the tab character (ASCII 9), and the new-line +characters (ASCII 13,10) are called \emph{white-space} characters. + +\section{General} +\subsection{Comments} +DPG accepts single and multi-line comments. Single-line comments begin with +§//§ while multi-line (block) comments are enclosed by §(*§~and~§*)§. + +\subsection{White Space} +Spaces, tabs, and new-lines (including most used §CR-LF§, §CR§, +§LF§ constructions) are separators in that they separate DPG +symbols, such as identifiers. White spaces have no additional +significance i.e. the code layout does not play any semantical +role. However the layout of the embedded Delphi code is preserved +in the ge\-ne\-ra\-ted source files. + +\subsection{Symbols} +DPG uses the following punctuation and keywords: + +\begin{table}[H] + \small + \begin{center} + \begin{tabular}{|ll|ll|} + \hline + §(...)§ & subrule & §unit§ & unit name \\ + §(...)*§ & closure subrule & §uses§ & uses section \\ + §(...)+§ & positive closure & §const§ & const section \\ + §(...)?§ & optional subrule & §type§ & type section \\ + §[...]§ & rule arguments & §lexer§ & lexer class \\ + §{...}§ & semantic action & §parser§ & parser class \\ + §{...}?§ & semantic predicate & §options§ & options section \\ + §(...)=>§ & syntactic predicate & §tokens§ & tokens section \\ + § |§ & alternative operator & §returns§ & rule return value \\ + § ..§ & range operator & §except§ & exception handler \\ + § ~§ & not operator & §finally§ & exception handler \\ + § !§ & ignore operator & §memberdecl§ & member declaration \\ + § .§ & wildcard & §memberdef§ & member definition \\ + § =§ & assignment operator & §local§ & local rule variables \\ + § :§ & label, start rule & & \\ + § ;§ & end rule & & \\ + \hline + \end{tabular} + \end{center} + \caption{DPG symbols} +\end{table} diff --git a/doc/usersguide/src/lang/lang.tex b/doc/usersguide/src/lang/lang.tex new file mode 100644 index 0000000..4c4d0a1 --- /dev/null +++ b/doc/usersguide/src/lang/lang.tex @@ -0,0 +1,10 @@ +\chapter{Syntactic elements} +\minitoc +\clearpage +\include{src/lang/lang-syntactic} +\include{src/lang/lang-atomprod} +\include{src/lang/lang-simpprod} +\include{src/lang/lang-prodoper} +\include{src/lang/lang-sect} +\include{src/lang/lang-opt} +\include{src/lang/lang-rest} diff --git a/doc/usersguide/src/rt/rt-err.tex b/doc/usersguide/src/rt/rt-err.tex new file mode 100644 index 0000000..cb68c21 --- /dev/null +++ b/doc/usersguide/src/rt/rt-err.tex @@ -0,0 +1,94 @@ +\section{Error handling} + +All syntactic and semantic errors throw exceptions. In particular, +the methods used to match tokens in the parser base class (match +etc) throw §EdpgMismatchedToken§. The methods in the lexer base +class used to match characters (match etc) throw exceptions +similarly. + +\subsection{DPG exception hierarchy} + +DPG-generated parsers throw exceptions to signal recognition +errors or other stream problems. All exceptions derive from +EdpgException. The hierarchy is as follows: + +\begin{verbatim} + EdpgException + EdpgMismatchedChar + EdpgMismatchedToken + EdpgSemantic +\end{verbatim} + +\subsection{EdpgException} +The §EdpgException§ is the base class for all DPG exceptions. It +defines the following read-only properties: +\begin{alltt} + FileName : string; + Line : integer; + Column : integer; +\end{alltt} +These properties contain information about the location where the exception +occurred. + +\subsection{EdpgMismatchedChar} +The §EdpgMismatchedChar§ exception is thrown by the lexer when it +is looking for a character, but finds a different one on the input +stream than expected. It defines the following properties in +addition to those of §EdpgException§. +\begin{alltt} + FoundChar : char; + FoundString : string; + CharSet : TdpgCharSet; + Str : string; + Inverted : boolean; +\end{alltt} +The §FoundChar§ and §FoundString§ properties contain the character +or string that was found on the input stream. The §CharSet§ and +§Str§ properties contain the values which the lexer expected to +find. The §Inverted§ property is set only if the exception came +from a §MatchNot(...)§ operation. In this case, the §CharSet§ +property contains the values, that the lexer must §not§ match. The +validity of pro\-per\-ti\-es are shown in the next table, +depending on the kind of exception. + +\begin{table}[H] + \small + \begin{center} + \begin{tabular}{lcc} + & Mismatched char & Mismatched string \\ + \hline + FoundChar & valid & - \\ + FoundString & - & valid \\ + CharSet & valid & - \\ + Str & - & valid \\ + Inverted & valid & - \\ + \hline + \end{tabular} + \end{center} +\end{table} + +\subsection{EdpgMismatchedToken} +The §EdpgMismatchedToken§ exception is thrown by the parser when +it is looking for a token, but finds a different one on the input +token stream than expected. It defines the following properties in +addition to those of §EdpgException§. +\begin{alltt} + FoundToken : IdpgToken; + TokenSet : TdpgByteSet; + Inverted : boolean; +\end{alltt} +The §FoundToken§ property contains the token the parser received from the +lexer. The §TokenSet§ property contains the vaules the parser expected to +get. The §Inverted§ property is set only if the exception came from a +§MatchNot(...)§ operation. In this case, the §TokenSet§ property contains the +values the parser must §not§ get. + +\subsection{EdpgSemantic} +This exception is thrown by a validating semantic predicate. It +defines the following property in addition to those of +§EdpgException§. +\begin{alltt} + Assert : string; +\end{alltt} +The §Assert§ property contains the validating expression that caused the +exception. diff --git a/doc/usersguide/src/rt/rt.tex b/doc/usersguide/src/rt/rt.tex new file mode 100644 index 0000000..50c0ab8 --- /dev/null +++ b/doc/usersguide/src/rt/rt.tex @@ -0,0 +1,3 @@ +\chapter{Run-time} +\minitoc \clearpage +\include{src/rt/rt-err} diff --git a/doc/usersguide/src/start/start.tex b/doc/usersguide/src/start/start.tex new file mode 100644 index 0000000..956c1c5 --- /dev/null +++ b/doc/usersguide/src/start/start.tex @@ -0,0 +1,200 @@ +\chapter{Getting started} +\minitoc \clearpage + +In this chapter, we develop a simple calculator. It accepts integers, the four +arithmetic operators (§+§,§-§,§/§,§*§), and parenthesis on its input. +Spaces, tabs and newline characters are treated as white spaces and used for +separating tokens. Complete Expressions must be terminated by semicolons. + +\section{Lexical analyzer} + +Let us define the calculator's lexer. + +\begin{verbatim} + 1 unit myLexer; + 2 + 3 lexer TmyLexer; + 4 options + 5 { + 6 exportvocab = myLexer; + 7 } +\end{verbatim} +In line §1§ we define the unit name of the generated Pascal source +file for the lexer. In line §3§ we give a name to the lexer class. +If there is an §options§ block for a grammar class, it must follow +the class declaration. Here, we define one option for the lexer: +§exportVocab§. This option tells the DPG that all the token +definitions must be exported to §myLexerTokens.txt§ and +§myLexerTokens.pas§. Grammars can import the generated token names +using the exported §.txt§ files. + +\paragraph{Note:} it is not necessary to define the §exportVocab§ option for a +grammar. The file names for the token exchange files are automatically created +using the specified unit name. + +Now we define the lexer tokens. + +\begin{verbatim} + 8 LPAREN: '('; + 9 RPAREN: ')'; + 10 PLUS: '+'; + 11 MINUS: '-'; + 12 STAR: '*'; + 13 SLASH: '/'; + 14 SEMI: ';'; +\end{verbatim} +In lines from §8§ to §14§, there are simple token definitions. Each of them +recognizes one character from the input stream. + +\begin{verbatim} + 15 INT: ('0'..'9')+ ; +\end{verbatim} +In line §15§, we define a rule to recognize integer numbers. This tells us that +the INT consists of one or more numeric characters. + +Now, define a rule to handle white space characters. +\begin{verbatim} + 16 WS + 17 : '\r' '\n' { _ttype := TT_SKIP; } + 18 | '\t' { _ttype := TT_SKIP; } + 19 | ' ' { _ttype := TT_SKIP; } + 20 ; +\end{verbatim} +Characters surrounded by curly braces are actions. The content of +an action block will be copied verbatim into the generated Pascal +source file. In this example the expression §_ttype := TT_SKIP;§ +forbids the §WS§ rule to generate a token, because we don't need +it. + +Now the lexer definition is finished. This simple lexer recognizes relevant +characters, integers and skips every white spaces on its input. + +\section{Parser} + +Now we define the parser. + +\begin{verbatim} + 1 unit myParser; + 2 + 3 parser TmyParser; + 4 options + 5 { + 6 importVocab = myLexer; + 7 } +\end{verbatim} +This part is analogous to lexer definition with one exception. In +line §6§, we import the tokens from a file specified by the +§exportVocab§ option in the lexer grammar. Now, the parser knows +which tokens are to be expected from the lexer. + +\begin{verbatim} + 8 memberdecl + 9 { + 10 value: integer; + 11 } +\end{verbatim} +In lines from §8§ to §11§, we specify the §memberdecl§ section. This section is +used to define members for the generated parser class. In this example, the §TmyParser§ +class will have a member called §value§. We use this member to store the result +of the calculation for the current expression. + +Now we define the rules. +\begin{verbatim} + 12 calc + 13 : (expression SEMI { writeln( value); } )* + 14 ; + 15 + 16 expression + 17 local + 18 { + 19 temp : integer; + 20 } + 21 : term { temp := value; } + 22 ( + 23 PLUS term { temp := temp + value; } + 24 | MINUS term { temp := temp - value; } + 25 )* { value := temp; } + 26 ; +\end{verbatim} +In lines §17..20§, we define a local variable for the rule +§expression§. The following rules are defined in a similar way to +the rule §expression§. + +\begin{verbatim} + 27 term + 28 local + 29 { + 30 temp : integer; + 31 } + 32 : factor { temp := value; } + 33 ( + 34 STAR factor { temp := temp * value; } + 35 | SLASH factor { temp := temp div value; } + 36 )* { value := temp; } + 37 ; + 38 + 39 factor + 40 local + 41 { + 42 temp : integer; + 43 } + 44 : uInt + 45 | LPAREN expression RPAREN + 46 ; + 47 + 48 uInt + 49 : x:INT { value := StrToInt( x.TokenText); } + 50 ; +\end{verbatim} +In line §49§, we specified that the rule must have a variable +called 'x' which will contain the INT token. For the moment, it is +enough to know that it has a property §TokenText§ which contains +the text of the recognized token. This property is a string +property, so we have to convert it to an integer, and store it in +the §value§ member variable. + +\section{The project} +The following simple project demonstrates how the defined lexer +and parser classes are used. + +\begin{verbatim} + 1 program calc; + 2 {$APPTYPE CONSOLE} + 3 uses + 4 Classes, + 5 SysUtils, + 6 myLexer in 'myLexer.pas', + 7 myParser in 'myParser.pas'; + 8 + 9 var + 10 stm: TFileStream; + 11 lex: TmyLexer; + 12 par: TmyParser; + 13 + 14 begin + 15 if ParamCount <> 1 then + 16 begin + 17 writeln('usage: calc '); + 18 exit; + 19 end + 20 else + 21 begin + 22 try + 23 stm := TFileStream.Create( ParamStr(1), + 24 fmOpenRead); + 24 lex := TmyLexer.Create(stm); + 25 par := TmyParser.Create(lex); + 26 + 27 par.calc; + 28 except + 29 on EdpgMismatchedToken do + 22 writeln('Syntax error'); + 30 on EdpgMismatchedChar do + 33 writeln('Syntax rrror'); + 29 end; + 30 end; + 31 + 32 stm.Free; + 33 par.Free; + 34 end. +\end{verbatim} diff --git a/doc/usersguide/src/tokens/tokens.tex b/doc/usersguide/src/tokens/tokens.tex new file mode 100644 index 0000000..8d66a1b --- /dev/null +++ b/doc/usersguide/src/tokens/tokens.tex @@ -0,0 +1,281 @@ +\chapter{Tokens} +\minitoc \clearpage +\section{Overview} +Tokens are the basic building blocks of any parser or compiler. +The task of a lexer (lexical analyzer, scanner) is to break up the +input character stream into a stream of tokens. Then §nextToken§ +method of a lexer passes the next token to the parser, or throws +an exception if the next character on the input stream cannot be +matched by any of the public lexer rules. The §nextToken§ method +is always synthesized from the public lexer rules. + +§Tokens§ in DPG are interface pointers. The interface type is §IdpgToken§, +which defines the following properties: +\begin{verbatim} + IdpgToken = interface + ... + property TokenText : string; + property TokenType : integer; + property TokenLine : integer; + property TokenColumn : integer; + ... + end; +\end{verbatim} + +where §TokenText§ is the text matched by the lexer; §TokenType§ is +the type of token assigned to the token by DPG; §TokenLine§ is the +line number where the token starts in the input stream; +§TokenColumn§ is the column number. + +Within parser rules, the input token can be accessed via this interface. To +obtain the interface to the recognized token, the reference to the token must +be prefixed by a label. For example, +\begin{verbatim} + ... + x:NUMBER + { + ... + LogMsg( 'Token: ' + x.TokenText ); + LogMsg( 'Type: ' + IntToStr(x.TokenType)); + ... + } + ... +\end{verbatim} + +Note: Variables for labels are always generated by DPG, so you should not define +them in the §local{...}§ section of the rule. + +\section{Defining tokens} +In DPG, tokens can be defined in the lexer grammars. DPG always +generates a token exchange file that describes all the token types +matched by the lexer. This file can be imported in a parser +grammar, so the lexer and parser have the same token types. Tokens +can be defined either, +\begin{itemize} + \item[-] via lexer rules, or + \item[-] in the lexer's §tokens{...}§ section +\end{itemize} + +\subsubsection{Defining a token using a lexer rule} +The commonest method of defining a token is using a lexer rule. In +lexer grammars, every rule is associated with a §TokenType§ which +is determined by DPG at compile time. This value is assigned to +the result token by default, but it can be modified in the given +rule if needed. This is used mostly in rules that need runtime +information to set the type of the result token, but is otherwise +uncommon. + +There is one exception: when a rule must not generate a token at all. +This is useful for defining comments or white-spaces for a grammar. +Every lexer rule has a local variable called §_ttype§. If +§_ttype§ has a value of §TT_SKIP§, then the rule won't generate any token. For +example, +\begin{verbatim} + SLCOMMENT : "//" ( ~'\n')* '\n' { _ttype := TT_SKIP; } ; +\end{verbatim} + +The following examples are normal lexer rules, and they are typical in lexers: +\begin{verbatim} + LPAREN: '('; + RPAREN: ')'; + DIGIT: '0'..'9'; + NUMBER: DIGIT (DIGIT)*; + LETTER: 'a'..'Z' | 'A'..'Z'; + ID: LETTER (LETTER | DIGIT | '_')*; +\end{verbatim} + +\subsubsection{Defining a token in the tokens\{...\} section} + +Lexer grammars may have a §tokens{...}§ section in the class +declaration. Within this section you can define ``imaginary'' +tokens and string literals. These tokens are not ``real'' tokens +and cannot be referenced in lexer rules. ``Imaginary'' tokens are +helpful when a rule can recognize more than one type of token and +defining rules for these tokens would be ambiguous. For example, + +\begin{verbatim} +tokens +{ + STRING; + CHAR; +} +// ======================================================== +// String or char +// ======================================================== +STRING_OR_CHAR +: '\'' (~'\'' | '\'' '\'')* '\'' + { + if TokenText = '''''' then _ttype := TT_STRING + else if TokenText = '''''''''' then _ttype := TT_CHAR + else if Length( TokenText) > 3 then _ttype := TT_STRING + else _ttype := TT_CHAR; + } +; +\end{verbatim} +The rule §STRING_OR_CHAR§ recognizes a pascal character literal, +and a pascal string literal. The code in the action block decides +which type of token must be created by the rule. Note: These +tokens are ``imaginary'' tokens. Referencing them in lexer +grammars is not possible, because they have no implementation. +Within parser rules, the tokens §STRING§ and §CHAR§ can be +referenced. But §STRING_OR_CHAR§ can't be referenced, because this +rule creates a §STRING§ or a §CHAR§ token. + +\paragraph{String literals} in the §tokens{...}§ section are useful when the language +defines keywords. In this case you can list your language's keywords in this +section. They will be put into the lexer's literals table. The lexer will consult +this table in the following cases: +\begin{itemize} + \item[-] if the §testLiterals§ option for the lexer class is true, the lexer checks the + literals table after each recognized token, + \item[-] if the §testLiterals§ option for the lexer class is false, the + check will be executed in rules, that have this option set. +\end{itemize} + +If neither lexer rules nor lexer class have this option set, the +lexer's literals table can be explicitly checked via the +§TestLiterals§ method. The advantage of using string literals is +that you can reference them in the parser as they are defined in +the §tokens{...}§ section. For example, + +\begin{verbatim} + ... + lexer TmyLexer; + options + { + testLiterals = true; + } + tokens + { + ... + "function"; + "procedure"; + ... + } + ... + + parser TmyParser; + rule1 : "function" ID SEMI; + rule2 : "procedure" ID LPAREN args RPAREN SEMI; + ... +\end{verbatim} +In the above example we set the §testLiterals§ option to true for the lexer +class. This is not recommended, because the lexer will check the literals table +even if it found a non-string token. Instead, you have to check the table in a +rule that can recognize these literals. For example: + +\begin{verbatim} + ... + lexer TmyLexer; + ... + + ID + options + { + testLiterals=true; + } + : 'a'..'z' | 'A'..'Z' ('a'..'z' | 'A'..'Z' | '0'..'9')* + ; +\end{verbatim} +Here the literals table will only be consulted in the rule §ID§. +This will improve the lexer's speed. Of course you can set the +§testLiterals§ options to true for as many rules as you want. All +of them will check the literals table. + +\paragraph{Note:} The §testLiterals§ option has no effect for lexer rules. + + +\section{User defined token classes} + +By default, DPG uses the §TdpgToken§ class to represent tokens. +This class is derived from §TInterfacedObject§, and implements the +§IdpgToken§ interface. This interface is used across the generated +code. To define a new token class you must derive your new class +from §TdpgToken§, implement your interface to access and +manipulate your object, and finally tell the lexer that it must +create your type of token object instead of the default +§TdpgToken§. After that, within the rules you must obtain the +interface of your class and use it. Let us have a more detailed +look at this: + +1. Create a token class: +\begin{verbatim} + ImyToken = interface( IdpgToken) + [a guid definition] + + function Get_MyString : string; + procedure Set_MyString( AString: string); + + property MyString : string read Get_MyString + write Set_MyString; + end; + + TmyToken = class( TdpgToken, + IdpgToken, + ImyToken) + protected + fMyString : string; + + function Get_MyString : string; + procedure Set_MyString( AString: string); + + public + constructor Create( pType: integer; + pText: string); override; + + end; + + constructor TmyToken.Create( pType: integer; + pText: string); + begin + inherited; + ... + your code here + ... + end; + + function TmyToken.Get_MyString: string; + begin + result := fMyString; + end; + + function TmyToken.Set_MyString( pString: string); + begin + fMyString := pString; + end; +\end{verbatim} + +2. Tell to lexer that it must use our token class. +\begin{verbatim} + uses myToken; + ... + myLexer.TokenClass := TmyToken; +\end{verbatim} + +3. Use it in a rule. +\begin{verbatim} + ... + parser TmyParser; + + rule1 + : + "procedure" x:id (LPAREN params RPAREN)? + { + (x as ImyToken).MyString := 'procid'; + } + ; + +\end{verbatim} + +\paragraph{Note:} You must cast the returned interface to your token interface, +because the §makeToken§ method of the lexer always returns an §IdpgToken§ +interface and the labels specified to obtain a reference to a token are always +§IdpgToken§ references. + +\paragraph{Note:} If you have to do special actions to initialize your token +class, you must have the same constructor as defined in the +example. The §makeToken§ method of the lexer always creates tokens +with this constructor. If you have another kind of constructor for +your token class, it won't be used by the lexer. + +\clearpage diff --git a/doc/usersguide/zlbook.cls b/doc/usersguide/zlbook.cls new file mode 100644 index 0000000..d8d8d0d --- /dev/null +++ b/doc/usersguide/zlbook.cls @@ -0,0 +1,88 @@ +\NeedsTeXFormat{LaTeX2e} +\ProvidesClass{zlbook} +\LoadClass[a4paper,twoside,11pt]{book} +\usepackage{times} +\usepackage{chappg} +\usepackage{here} +\usepackage{alltt} +\usepackage[bookman]{quotchap} +\RequirePackage{shortvrb} +\MakeShortVerb{\§} + +\let\o@verbatim\verbatim + +\def\verbatim{% + \ifhmode\unskip\par\fi +% \nopagebreak % Overridden by list penalty + \ifx\@currsize\normalsize + \small + \fi + \o@verbatim +} + +% No paragraph indentation, space between paragraphs +\setlength{\parindent}{0pt} +\setlength{\parskip}{\medskipamount} + +\renewcommand{\thepage}{\thechapter\ - \arabic{page}} + +\usepackage{fancyhdr} +\pagestyle{fancy} +%\addtolength{\headwidth}{0.5in} +%\addtolength{\headwidth}{\marginparsep} +%\addtolength{\headwidth}{\marginparwidth} +\renewcommand{\chaptermark}[1]{\markboth{#1}{}} +\renewcommand{\sectionmark}[1]{\markright{\thesection\ #1}} +\fancyhf{} +%\fancyhead[LE,RO]{\bfseries\thepage} +%\fancyhead[RO]{\bfseries\rightmark} +%\fancyhead[LE]{\bfseries\leftmark} +\fancyfoot[RO]{\bfseries\thepage} +\fancyfoot[LE]{\bfseries\thepage} + +\fancyhead[RO]{\rightmark} +\fancyhead[LE]{\leftmark} +\fancyfoot[RO]{\thepage} +\fancyfoot[LE]{\thepage} + +\fancypagestyle{plain}{% + \fancyhf{} + \renewcommand{\headrulewidth}{0pt} + \renewcommand{\footrulewidth}{0pt} +} + +\renewcommand{\headrulewidth}{0.4pt} +\renewcommand{\footrulewidth}{0.4pt} +%\renewcommand{\normalsize}{\fontsize{10pt}{12pt}\selectfont} + + +\def\cleardoublepage{\clearpage\if@twoside \ifodd\c@page\else + \hbox{} + \vspace*{\fill} + \begin{center} +% This page intentionally left blank. + \end{center} + \vspace{\fill} + \thispagestyle{empty} + \newpage + \if@twocolumn\hbox{}\newpage\fi\fi\fi} + +%\addtolength{\textwidth}{1cm} + + +\newenvironment{decl}[1][]% + {\par\small\addvspace{4.5ex plus 1ex}% + \vskip -\parskip + \ifx\relax#1\relax + \def\@decl@date{}% + \else + \def\@decl@date{\NEWfeature{#1}}% + \fi + \noindent\hspace{-\leftmargini}% + \begin{tabular}{|l|}\hline\ignorespaces}% + {\\\hline\end{tabular}\nobreak\@decl@date\par\nobreak + \vspace{2.3ex}\vskip -\parskip} + +\newcommand{\NEWfeature}[1]{% + \hskip 1sp \marginpar{\small\sffamily\raggedright + New feature\\#1}}