Initial check in docu

This commit is contained in:
2026-01-03 18:31:15 +01:00
parent e2c3cbc520
commit ee130973e2
98 changed files with 9430 additions and 0 deletions
+36
View File
@@ -0,0 +1,36 @@
program calc;
{$APPTYPE CONSOLE}
uses
Classes,
SysUtils,
calcLexer in 'calcLexer.pas',
calcParser in 'calcParser.pas';
var
stm: TFileStream;
lex: TcalcLexer;
par: TcalcParser;
begin
if ParamCount <> 1 then
begin
writeln('usage: calc <filename>');
exit;
end
else
begin
try
stm := TFileStream.Create( ParamStr(1), fmOpenRead);
lex := TcalcLexer.Create(stm);
par := TcalcParser.Create(lex);
par.calc;
except
end;
end;
stm.Free;
par.Free;
end.
+5
View File
@@ -0,0 +1,5 @@
1+2+3+4+5+6+7+8+9;
(((((2+3)))));
(-1*(-2*(-3*(-4+ -5))));
(-1*(-2*(-3*(-4+ 5))));
7 * -(-9);
+46
View File
@@ -0,0 +1,46 @@
// ============================================================================
// Demo lexer for a four operator calculator
// ============================================================================
unit calcLexer;
lexer TcalcLexer;
options
{
exportVocab = calcLexer;
k = 2;
}
// ============================================================================
// Simple tokens
// ============================================================================
LPAREN : '(';
RPAREN : ')';
PLUS : '+';
MINUS : '-';
STAR : '*';
SLASH : '/';
SEMI : ';';
// ============================================================================
// INT
// ============================================================================
INT : ('0'..'9')+;
// ============================================================================
// White space
// ============================================================================
WS
:
(
'\r' '\n' { newLine; }
| '\r' { newLine; }
| '\n' { newLine; }
| '\t' { tab; }
| ' '
)
{
_ttype := TT_SKIP;
}
;
+92
View File
@@ -0,0 +1,92 @@
// ============================================================================
// Demo parser for four operator calculator
// ============================================================================
unit calcParser;
parser TcalcParser;
options
{
importVocab = calcLexer;
exportVocab = calcParser;
}
memberdecl
{
value : integer;
}
// ============================================================================
// calc
// ============================================================================
calc
: (expression SEMI {writeln(value);} )+
;
// ============================================================================
// expression
// ============================================================================
expression
: simpleExpression
;
// ============================================================================
// simpleExpression
// ============================================================================
simpleExpression
local
{
temp: integer;
}
: term { temp := value; }
(
PLUS term { temp := temp + value; }
| MINUS term { temp := temp - value; }
)* { value := temp; }
;
// ============================================================================
// term
// ============================================================================
term
local
{
temp: integer;
}
: factor { temp := value; }
(
STAR factor { temp := temp * value; }
| SLASH factor { temp := temp div value; }
)* { value := temp; }
;
// ============================================================================
// factor
// ============================================================================
factor
local
{
s: integer;
}
{
s := 1;
}
:
(
PLUS { s := 1; }
| MINUS { s := -1; }
)?
(
uInt
| LPAREN expression RPAREN
)
{
value := s * value;
}
;
// ============================================================================
// uInt
// ============================================================================
uInt
: x:INT { value := StrToInt( x.TokenText); }
;
+10
View File
@@ -0,0 +1,10 @@
To build demo project you must first compile the grammars.
1. Open calc.dpp in DPG
2. Press F9 to compile the grammars
After the compilation the project can be opened in delphi. Be sure that the dpg
runtime library is in the delphi library path. (In the project settings,
or in the environment settings).
Have fun...
+43
View File
@@ -0,0 +1,43 @@
unit filter;
lexer Tfilter;
options
{
k = 2;
filter = true;
}
// ----------------------------------------------------------------------------
// Paragraph
// ----------------------------------------------------------------------------
P
: "<p>"
;
// ----------------------------------------------------------------------------
// Break
// ----------------------------------------------------------------------------
BR
: "<br>"
;
// ----------------------------------------------------------------------------
// Newline
// ----------------------------------------------------------------------------
NEWLINE
:
(
'\r' '\n' { newLine; _ttype := TT_SKIP; }
| '\r' { newLine; _ttype := TT_SKIP; }
| '\n' { newLine; _ttype := TT_SKIP; }
)
;
// ----------------------------------------------------------------------------
// Tab
// ----------------------------------------------------------------------------
TAB
: '\t' { tab; _ttype := TT_SKIP; }
;
+234
View File
@@ -0,0 +1,234 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.0.78r
// Grammar: filter.g
// ============================================================================
unit filter;
interface
uses
Classes,
Contnrs,
dpgLexer,
dpgToken,
dpgTypes,
filterTokens,
SysUtils;
type
// =========================================================================
// Class Tfilter declaration
// =========================================================================
Tfilter = class( TdpgLexer)
protected // Public grammar rules ("rescoped")
procedure mP ( pCreate: boolean);
procedure mBR ( pCreate: boolean);
procedure mNEWLINE ( pCreate: boolean);
procedure mTAB ( pCreate: boolean);
public
function NextToken: IdpgToken; override;
end;
implementation
uses
dpgException,
dpgExceptionSemantic,
dpgExceptionMismatchedChar;
// ============================================================================
// mP
// ============================================================================
procedure Tfilter.mP( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_P;
match('<p>');
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mBR
// ============================================================================
procedure Tfilter.mBR( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_BR;
match('<br>');
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mNEWLINE
// ============================================================================
procedure Tfilter.mNEWLINE( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_NEWLINE;
if (( LA(1) in [#13]) and (LA(2) in [#10])) then
begin
match(#13);
match(#10);
newLine; _ttype := TT_SKIP;
end
else if (( LA(1) in [#13])) then
begin
match(#13);
newLine; _ttype := TT_SKIP;
end
else if (( LA(1) in [#10])) then
begin
match(#10);
newLine; _ttype := TT_SKIP;
end
else
Raise EdpgMismatchedChar.Create( LA(1), [#10,#13], FileName, Line, Column);
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mTAB
// ============================================================================
procedure Tfilter.mTAB( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_TAB;
match(#9);
tab; _ttype := TT_SKIP;
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ----------------------------------------------------------------------------
// NextToken
// ----------------------------------------------------------------------------
function Tfilter.NextToken : IdpgToken;
begin
while( true) do
begin
ResetText;
try
if (( LA(1) in ['<']) and (LA(2) in ['p'])) then
begin
mP(true);
result := fReturnToken;
end
else if (( LA(1) in ['<']) and (LA(2) in ['b'])) then
begin
mBR(true);
result := fReturnToken;
end
else if (( LA(1) in [#10,#13])) then
begin
mNEWLINE(true);
result := fReturnToken;
end
else if (( LA(1) in [#9])) then
begin
mTAB(true);
result := fReturnToken;
end
else
begin
if LA(1) = EOF_CHAR then
begin
uponEof;
result := TdpgToken.Create(TT_EOF);
end
else
begin
consume;
continue;
end;
end;
// --------------------------------------------------------------
// If we found a SKIP token, then try again...
// --------------------------------------------------------------
if result = nil then
continue;
// --------------------------------------------------------------
// Now we have a valid token, so exit the function
// --------------------------------------------------------------
break;
except
consume;
continue;
end;
end;
end;
end.
+19
View File
@@ -0,0 +1,19 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.0.78r
// Grammar: filter.g
// ============================================================================
unit filterTokens;
interface
const
TT_EOF = 1;
TT_P = 4;
TT_BR = 5;
TT_NEWLINE = 6;
TT_TAB = 7;
implementation
end.
+7
View File
@@ -0,0 +1,7 @@
// $Delphi Parser Generator: filter.pas -> TfilterTokens.txt$
Tfilter
TT_EOF=1
TT_P=4
TT_BR=5
TT_NEWLINE=6
TT_TAB=7
+367
View File
@@ -0,0 +1,367 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.0.118r
// Grammar: javadoclexer.g
// ============================================================================
unit JavaDocLexer;
interface
uses
Classes,
Contnrs,
dpgLexer,
dpgToken,
dpgTokenStreamSelector,
dpgTypes,
JavaDocTokens,
SysUtils;
type
// =========================================================================
// Class TJavaDocLexer declaration
// =========================================================================
TJavaDocLexer = class( TdpgLexer)
public
Selector : IdpgTokenStreamSelector;
protected // Protected grammar rules
procedure mID ( pCreate: boolean);
protected // Public grammar rules ("rescoped")
procedure mPARAM ( pCreate: boolean);
procedure mEXCEPTION ( pCreate: boolean);
procedure mSTAR ( pCreate: boolean);
procedure mJAVADOC_CLOSE ( pCreate: boolean);
procedure mNEWLINE ( pCreate: boolean);
public
function NextToken: IdpgToken; override;
end;
implementation
uses
dpgException,
dpgExceptionSemantic,
dpgExceptionMismatchedChar;
// ============================================================================
// mPARAM
// ============================================================================
procedure TJavaDocLexer.mPARAM( pCreate: boolean);
var
_begin: integer;
_cnt_3: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_PARAM;
match('@param');
_cnt_3 := 0;
while(true) do
begin
if (( LA(1) in [' '])) then
begin
match(' ');
end
else
begin
if _cnt_3 >= 1 then
break
else
Raise EdpgMismatchedChar.Create( LA(1), [' '], FileName, Line, Column);
end;
INC(_cnt_3);
end;
mID(false);
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mID
// ============================================================================
procedure TJavaDocLexer.mID( pCreate: boolean);
var
_begin: integer;
_cnt_9: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_ID;
_cnt_9 := 0;
while(true) do
begin
if (( LA(1) in ['a'..'z'])) then
begin
match( ['a'..'z']);
end
else
begin
if _cnt_9 >= 1 then
break
else
Raise EdpgMismatchedChar.Create( LA(1), ['a'..'z'], FileName, Line, Column);
end;
INC(_cnt_9);
end;
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mEXCEPTION
// ============================================================================
procedure TJavaDocLexer.mEXCEPTION( pCreate: boolean);
var
_begin: integer;
_cnt_6: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_EXCEPTION;
match('@exception');
_cnt_6 := 0;
while(true) do
begin
if (( LA(1) in [' '])) then
begin
match(' ');
end
else
begin
if _cnt_6 >= 1 then
break
else
Raise EdpgMismatchedChar.Create( LA(1), [' '], FileName, Line, Column);
end;
INC(_cnt_6);
end;
mID(false);
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mSTAR
// ============================================================================
procedure TJavaDocLexer.mSTAR( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_STAR;
match('*');
_ttype := TT_SKIP;
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mJAVADOC_CLOSE
// ============================================================================
procedure TJavaDocLexer.mJAVADOC_CLOSE( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_JAVADOC_CLOSE;
match('*/');
Selector.Pop;
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mNEWLINE
// ============================================================================
procedure TJavaDocLexer.mNEWLINE( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_NEWLINE;
if (( LA(1) in [#13]) and (LA(2) in [#10])) then
begin
match(#13);
match(#10);
end
else if (( LA(1) in [#13])) then
begin
match(#13);
end
else if (( LA(1) in [#10])) then
begin
match(#10);
end
else
Raise EdpgMismatchedChar.Create( LA(1), [#10,#13], FileName, Line, Column);
newLine;
_ttype := TT_SKIP;
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ----------------------------------------------------------------------------
// NextToken
// ----------------------------------------------------------------------------
function TJavaDocLexer.NextToken : IdpgToken;
var
la1 : char;
la2 : char;
begin
while( true) do
begin
ResetText;
try
la1 := LA(1);
la2 := LA(2);
if (( LA(1) in ['@']) and (LA(2) in ['p'])) then
begin
mPARAM(true);
result := fReturnToken;
end
else if (( LA(1) in ['@']) and (LA(2) in ['e'])) then
begin
mEXCEPTION(true);
result := fReturnToken;
end
else if (( LA(1) in ['*']) and (LA(2) in ['/'])) then
begin
mJAVADOC_CLOSE(true);
result := fReturnToken;
end
else if (( LA(1) in ['*'])) then
begin
mSTAR(true);
result := fReturnToken;
end
else if (( LA(1) in [#10,#13])) then
begin
mNEWLINE(true);
result := fReturnToken;
end
else
begin
if LA(1) = EOF_CHAR then
begin
uponEof;
result := TdpgToken.Create(TT_EOF);
end
else
begin
consume;
continue;
end;
end;
// --------------------------------------------------------------
// If we found a SKIP token, then try again...
// --------------------------------------------------------------
if result = nil then
continue;
// --------------------------------------------------------------
// Now we have a valid token, so exit the function
// --------------------------------------------------------------
break;
except
consume;
continue;
end;
end;
end;
end.
+21
View File
@@ -0,0 +1,21 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.0.118r
// Grammar: javadoclexer.g
// ============================================================================
unit JavaDocTokens;
interface
const
TT_EOF = 1;
TT_PARAM = 4;
TT_EXCEPTION = 5;
TT_ID = 6;
TT_STAR = 7;
TT_JAVADOC_CLOSE = 8;
TT_NEWLINE = 9;
implementation
end.
@@ -0,0 +1,9 @@
// $Delphi Parser Generator: JavaDocLexer.pas -> TJavaDocLexerTokens.txt$
TJavaDocLexer
TT_EOF=1
TT_PARAM=4
TT_EXCEPTION=5
TT_ID=6
TT_STAR=7
TT_JAVADOC_CLOSE=8
TT_NEWLINE=9
+286
View File
@@ -0,0 +1,286 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.0.118r
// Grammar: javalexer.g
// ============================================================================
unit JavaLexer;
interface
uses
Classes,
Contnrs,
dpgLexer,
dpgToken,
dpgTokenStreamSelector,
dpgTypes,
JavaTokens,
SysUtils;
type
// =========================================================================
// Class TJavaLexer declaration
// =========================================================================
TJavaLexer = class( TdpgLexer)
public
Selector : IdpgTokenStreamSelector;
protected // Internals
procedure initialize; override;
protected // Public grammar rules ("rescoped")
procedure mSEMI ( pCreate: boolean);
procedure mJAVADOC_OPEN ( pCreate: boolean);
procedure mID ( pCreate: boolean);
procedure mWS ( pCreate: boolean);
public
function NextToken: IdpgToken; override;
end;
implementation
uses
dpgException,
dpgExceptionSemantic,
dpgExceptionMismatchedChar;
// ============================================================================
// mSEMI
// ============================================================================
procedure TJavaLexer.mSEMI( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_SEMI;
match(';');
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mJAVADOC_OPEN
// ============================================================================
procedure TJavaLexer.mJAVADOC_OPEN( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_JAVADOC_OPEN;
match('/**');
Selector.Push('docLexer');
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mID
// ============================================================================
procedure TJavaLexer.mID( pCreate: boolean);
var
_begin: integer;
_cnt_5: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_ID;
_cnt_5 := 0;
while(true) do
begin
if (( LA(1) in ['a'..'z'])) then
begin
match( ['a'..'z']);
end
else
begin
if _cnt_5 >= 1 then
break
else
Raise EdpgMismatchedChar.Create( LA(1), ['a'..'z'], FileName, Line, Column);
end;
INC(_cnt_5);
end;
_ttype := testLit( _ttype);
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mWS
// ============================================================================
procedure TJavaLexer.mWS( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_WS;
if (( LA(1) in [' '])) then
begin
match(' ');
end
else if (( LA(1) in [#9])) then
begin
match(#9);
end
else if (( LA(1) in [#10,#13])) then
begin
if (( LA(1) in [#13]) and (LA(2) in [#10])) then
begin
match(#13);
match(#10);
end
else if (( LA(1) in [#13])) then
begin
match(#13);
end
else if (( LA(1) in [#10])) then
begin
match(#10);
end
else
Raise EdpgMismatchedChar.Create( LA(1), [#10,#13], FileName, Line, Column);
newLine;
end
else
Raise EdpgMismatchedChar.Create( LA(1), [#9..#10,#13,' '], FileName, Line, Column);
_ttype := TT_SKIP;
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ----------------------------------------------------------------------------
// NextToken
// ----------------------------------------------------------------------------
function TJavaLexer.NextToken : IdpgToken;
var
_first : TdpgCharSet;
begin
_first := [#9..#10,#13,' ','/',';','a'..'z'];
while( true) do
begin
ResetText;
try
if (( LA(1) in [';'])) then
begin
mSEMI(true);
result := fReturnToken;
end
else if (( LA(1) in ['/'])) then
begin
mJAVADOC_OPEN(true);
result := fReturnToken;
end
else if (( LA(1) in ['a'..'z'])) then
begin
mID(true);
result := fReturnToken;
end
else if (( LA(1) in [#9..#10,#13,' '])) then
begin
mWS(true);
result := fReturnToken;
end
else
begin
if LA(1) = EOF_CHAR then
begin
uponEof;
result := TdpgToken.Create(TT_EOF);
end
else
Raise EdpgMismatchedChar.Create(LA(1), _first, FileName, Line, Column);
end;
// --------------------------------------------------------------
// If we found a SKIP token, then try again...
// --------------------------------------------------------------
if result = nil then
continue;
// --------------------------------------------------------------
// Now we have a valid token, so exit the function
// --------------------------------------------------------------
break;
except
Raise;
end;
end;
end;
// ----------------------------------------------------------------------------
// InitLiterals
// ----------------------------------------------------------------------------
procedure TJavaLexer.initialize;
begin
fLiterals.Add('int', 10);
end;
end.
+25
View File
@@ -0,0 +1,25 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.0.118r
// Grammar: javalexer.g
// ============================================================================
unit JavaTokens;
interface
const
TT_EOF = 1;
TT_PARAM = 4;
TT_EXCEPTION = 5;
TT_ID = 6;
TT_STAR = 7;
TT_JAVADOC_CLOSE = 8;
TT_NEWLINE = 9;
LT_int = 10;
TT_SEMI = 11;
TT_JAVADOC_OPEN = 12;
TT_WS = 13;
implementation
end.
+13
View File
@@ -0,0 +1,13 @@
// $Delphi Parser Generator: JavaLexer.pas -> TJavaLexerTokens.txt$
TJavaLexer
TT_EOF=1
TT_PARAM=4
TT_EXCEPTION=5
TT_ID=6
TT_STAR=7
TT_JAVADOC_CLOSE=8
TT_NEWLINE=9
LT_int="int"=10
TT_SEMI=11
TT_JAVADOC_OPEN=12
TT_WS=13
+49
View File
@@ -0,0 +1,49 @@
program demo;
{$APPTYPE CONSOLE}
uses
Classes,
SysUtils,
dpgTokenStreamSelector,
javaLexer,
javaDocLexer,
javaParser;
var
stm : TFileStream;
lexJava : TJavaLexer;
lexJavaDoc : TJavaDocLexer;
parJava : TJavaParser;
sel : TdpgTokenStreamSelector;
begin
if ParamCount = 1 then
begin
try
stm := TFileStream.Create( ParamStr(1), fmOpenRead);
sel := TdpgTokenStreamSelector.Create;
lexJava := TJavaLexer.Create( stm);
lexJavaDoc := TJavaDocLexer.Create( lexJava.InputState);
lexJava.Selector := sel;
lexJavaDoc.Selector := sel;
sel.add( lexJava, 'main');
sel.add( lexJavaDoc, 'docLexer');
sel.select( 'main');
parJava := TJavaParser.Create( sel);
parJava.input;
except
writeln('Exception...');
end;
end;
{ TODO -oUser -cConsole Main : Insert code here }
end.
+76
View File
@@ -0,0 +1,76 @@
unit JavaDocLexer;
uses
{
dpgTokenStreamSelector;
}
lexer TJavaDocLexer;
options
{
k = 2;
exportVocab = JavaDoc;
filter = true;
}
memberdecl
{
public
Selector : IdpgTokenStreamSelector;
}
// ----------------------------------------------------------------------------
// @param
// ----------------------------------------------------------------------------
PARAM
: "@param" (' ')+ ID
;
// ----------------------------------------------------------------------------
// @exception
// ----------------------------------------------------------------------------
EXCEPTION
: "@exception" (' ')+ ID
;
// ----------------------------------------------------------------------------
// identifier
// ----------------------------------------------------------------------------
protected ID
: ('a'..'z')+
;
// ----------------------------------------------------------------------------
// Star
//
// This rule simply prevents JAVADOC_CLOSE from being called for every '*' in
// a comment. Calling JAVADOC_CLOSE will fail for simple '*' and cause an
// exception, which is slow. In other words, the grammar will work without
// this rule, but is slower.
// ----------------------------------------------------------------------------
STAR
: '*' { _ttype := TT_SKIP; }
;
// ----------------------------------------------------------------------------
// JavaDocClose
// ----------------------------------------------------------------------------
JAVADOC_CLOSE
: "*/" { Selector.Pop; }
;
// ----------------------------------------------------------------------------
// NewLine
// ----------------------------------------------------------------------------
NEWLINE
:
(
'\r' '\n'
| '\r'
| '\n'
)
{
newLine;
_ttype := TT_SKIP;
}
;
+68
View File
@@ -0,0 +1,68 @@
unit JavaLexer;
uses
{
dpgTokenStreamSelector;
}
lexer TJavaLexer;
options
{
k = 2;
importVocab = JavaDoc;
exportVocab = Java;
}
tokens
{
"int";
}
memberdecl
{
public
Selector : IdpgTokenStreamSelector;
}
// ----------------------------------------------------------------------------
// Simple tokens
// ----------------------------------------------------------------------------
SEMI : ';';
// ----------------------------------------------------------------------------
// JavaDocOpen
// ----------------------------------------------------------------------------
JAVADOC_OPEN
: "/**" { Selector.Push('docLexer'); }
;
// ----------------------------------------------------------------------------
// Identifier
// ----------------------------------------------------------------------------
ID
options
{
testLiterals = true;
}
: ('a'..'z')+
;
WS
:
(
' '
| '\t'
|
(
'\r' '\n'
| '\r'
| '\n'
)
{
newLine;
}
)
{
_ttype := TT_SKIP;
}
;
+20
View File
@@ -0,0 +1,20 @@
unit javaParser;
parser TJavaParser;
options
{
k = 2;
importVocab = Java;
}
input
: ( (javadoc)? "int" ID SEMI)+
;
javadoc
:
JAVADOC_OPEN
(PARAM)?
(EXCEPTION)?
JAVADOC_CLOSE
;
+91
View File
@@ -0,0 +1,91 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.0.118r
// Grammar: javaparser.g
// ============================================================================
unit javaParser;
interface
uses
Classes,
Contnrs,
dpgLLkParser,
dpgToken,
dpgTypes,
javaParserTokens,
SysUtils;
type
// =========================================================================
// Class TJavaParser declaration
// =========================================================================
TJavaParser = class( TdpgLLkParser)
public // Public grammar rules
procedure input ;
procedure javadoc ;
end;
implementation
uses
dpgException,
dpgExceptionSemantic,
dpgExceptionMismatchedToken;
// ============================================================================
// input
// ============================================================================
procedure TJavaParser.input;
var
_cnt_4: integer;
begin
_cnt_4 := 0;
while(true) do
begin
if (( LA(1) in [LT_int,TT_JAVADOC_OPEN])) then
begin
if (( LA(1) in [TT_JAVADOC_OPEN])) then
begin
javadoc;
end;
match(LT_int);
match(TT_ID);
match(TT_SEMI);
end
else
begin
if _cnt_4 >= 1 then
break
else
Raise EdpgMismatchedToken.Create( LT(1), [LT_int,TT_JAVADOC_OPEN], FileName);
end;
INC(_cnt_4);
end;
end;
// ============================================================================
// javadoc
// ============================================================================
procedure TJavaParser.javadoc;
begin
match(TT_JAVADOC_OPEN);
if (( LA(1) in [TT_PARAM])) then
begin
match(TT_PARAM);
end;
if (( LA(1) in [TT_EXCEPTION])) then
begin
match(TT_EXCEPTION);
end;
match(TT_JAVADOC_CLOSE);
end;
end.
@@ -0,0 +1,25 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.0.118r
// Grammar: javaparser.g
// ============================================================================
unit javaParserTokens;
interface
const
TT_EOF = 1;
TT_PARAM = 4;
TT_EXCEPTION = 5;
TT_ID = 6;
TT_STAR = 7;
TT_JAVADOC_CLOSE = 8;
TT_NEWLINE = 9;
LT_int = 10;
TT_SEMI = 11;
TT_JAVADOC_OPEN = 12;
TT_WS = 13;
implementation
end.
@@ -0,0 +1,13 @@
// $Delphi Parser Generator: javaParser.pas -> TJavaParserTokens.txt$
TJavaParser
TT_EOF=1
TT_PARAM=4
TT_EXCEPTION=5
TT_ID=6
TT_STAR=7
TT_JAVADOC_CLOSE=8
TT_NEWLINE=9
LT_int="int"=10
TT_SEMI=11
TT_JAVADOC_OPEN=12
TT_WS=13
+20
View File
@@ -0,0 +1,20 @@
/** a javadoc comment
* @param foo
* @exception bar
* Just a little text for a comment
*/
int abc;
/** a javadoc comment
* @param foo
* @exception bar
* Just a little text for a comment
*/
int zzz;
/** a javadoc comment
* @param foo
* @exception bar
* Just a little text for a comment
*/
int xxx;
+10
View File
@@ -0,0 +1,10 @@
To build a project you must first compile the grammars.
After the compilation the project can be opened in delphi. Be sure that the dpg
runtime library is in the delphi library path. (In the project settings,
or in the environment settings).
Have fun...
ps.: I'm not sure that the grammar is correct, or not...
This is only for showing dpg features...
+359
View File
@@ -0,0 +1,359 @@
{
* A Child-Sibling Tree.
*
* A tree with PLUS at the root and with two children 3 and 4 is
* structured as:
*
* PLUS
* |
* 3 -- 4
*
* and can be specified easily in LISP notation as
*
* (PLUS 3 4)
*
* where every '(' starts a new subtree.
*
* These trees are particular useful for translators because of
* the flexibility of the children lists. They are also very easy
* to walk automatically, whereas trees with specific children
* reference fields can't easily be walked automatically.
*
* This class contains the basic support for an AST.
* Most people will create ASTs that are subclasses of
* BaseAST or of CommonAST.
*/
}
unit ast;
interface
uses
Classes,
Generics.Collections;
// dpgrtl.Token;
type
TAST = class;
TASTList = TList<TAST>;
TAST = class
protected
fDown : TAST;
fRight : TAST;
fAstText : AnsiString;
fAstType : integer;
fAstLine : integer;
fAstColumn : integer;
private
fVerbose : boolean; // verbose string conversion
fTokenNames : TStringList;
private
procedure DoFindAll( NodeToSearch : TAST;
// v : Vector;
Target : TAST;
PartialMatch : boolean);
protected
function GetNumberofChildren: integer;
function GetEquals( Node : TAST): boolean; virtual;
function GetEqualsList( Node : TAST): boolean; virtual;
function GetEqualsListPartial( Node : TAST): boolean; virtual;
function GetEqualsTree( Node : TAST): boolean; virtual;
function GetEqualsTreePartial( Node : TAST): boolean; virtual;
function GetTokenNames : TStringList;
procedure SetVerbose( Verbose : boolean;
Names : TStringList);
public
procedure Initialize( AstType : integer;
AstText : AnsiString); overload; virtual; abstract;
procedure Initialize( Node : TAST); overload; virtual; abstract;
// procedure Initialize( Token : TToken); overload; virtual; abstract;
procedure AddChild( node: TAST);
procedure RemoveChildren;
public
procedure AfterConstruction; override;
procedure BeforeDestruction; override;
public
property NumberOfChildren : integer read GetNumberofChildren;
property NextSibling : TAST read fRight write fRight;
property FirstChild : TAST read fDown write fDown;
property AstText : AnsiString read fAstText write fAstText;
property AstType : integer read fAstType write fAstType;
property AstLine : integer read fAstLine write fAstLine;
property AstColumn : integer read fAstColumn write fAstColumn;
property Equals [n: TAST]: boolean read GetEquals;
property EqualsList [n: TAST]: boolean read GetEqualsList;
property EqualsListPartial [n: TAST]: boolean read GetEqualsListPartial;
property EqualsTree [n: TAST]: boolean read GetEqualsTree;
property EqualsTreePartial [n: TAST]: boolean read GetEqualsTreepartial;
end;
TASTClass = class of TAST;
implementation
{ TastBase }
procedure TAST.AfterConstruction;
begin
inherited;
fTokenNames := TStringList.Create;
end;
procedure TAST.BeforeDestruction;
begin
fTokenNames.Free;
inherited;
end;
// ================================================================================================
// DoFindAll
// ================================================================================================
procedure TAST.DoFindAll(NodeToSearch, Target: TAST; PartialMatch: boolean);
var
sibling : TAST;
child : TAST;
begin
// start walking sibling lists, looking for matches
sibling := NodeToSearch;
while Assigned(sibling) do
begin
if ( PartialMatch and sibling.EqualsTreePartial[Target]) or
( not PartialMatch and sibling.EqualsTree[Target]) then
// v.appendelement(sibling)
;
if Assigned( sibling.FirstChild) then
DoFindAll( sibling.FirstChild, {v,} Target, PartialMatch);
sibling := sibling.NextSibling
end;
end;
// ================================================================================================
// Get Number of Children
// ================================================================================================
function TAST.GetNumberofChildren: integer;
var
t: TAST;
n: integer;
begin
t := fDown;
n := 0;
if Assigned(t) then
begin
INC(n);
while Assigned(t.fRight) do
begin
t := t.fRight;
INC(n)
end;
end;
result := n
end;
// ================================================================================================
// Get Equals
// ================================================================================================
function TAST.GetEquals(Node: TAST): boolean;
begin
if Assigned(Node)
then result := (Node.fAstText = fAstText) and (Node.fAstType = fAstType)
else result := false
end;
// ================================================================================================
// Get Equals List
// ================================================================================================
function TAST.GetEqualsList(Node: TAST): boolean;
var
sibling: TAST;
begin
result := false;
if Assigned(Node) then
begin
sibling := self;
while Assigned(sibling) and Assigned(Node) do
begin
// as a quick optimization, check roots firt
if not sibling.Equals[Node] then
break;
// if roots match, do full list match test on children
if Assigned( sibling.FirstChild) then
begin
if not sibling.FirstChild.EqualsList[Node.FirstChild] then
break
end
// sibling has no kids, make sure Node doesn't either
else if Assigned(Node.FirstChild) then
break;
sibling := sibling .NextSibling;
Node := Node .NextSibling;
end;
if not Assigned(sibling) and not Assigned(Node) then
result := true
end;
end;
// ================================================================================================
// Get Equals List Partial
//
// Is Node a subtree of this list ? The siblings of the root are NOT ignored.
// ================================================================================================
function TAST.GetEqualsListPartial(Node: TAST): boolean;
var
sibling: TAST;
begin
result := false;
if Assigned(Node) then
begin
sibling := self;
while Assigned(sibling) and Assigned(Node) do
begin
// as a quick optimization, check roots firt
if not sibling.Equals[Node] then
break;
// if roots match, do partial list match test on children
if Assigned( sibling.FirstChild) then
if not sibling.FirstChild.EqualsListPartial[Node.FirstChild] then
break
end;
if not Assigned(sibling) and Assigned(Node)
then result := false
else result := true
end;
end;
// ================================================================================================
// Get Equals Tree
//
// Is the tree rooted at *self* equals to *Node* ?
// The sibling of *self* are ignored.
// ================================================================================================
function TAST.GetEqualsTree(Node: TAST): boolean;
begin
result := false;
if Equals[Node] then
begin
// if roots match, do full list match test on children
if Assigned(FirstChild) then
begin
if not FirstChild.EqualsList[Node.FirstChild] then
exit;
end
// No kids, make sure *Node* hasn't either
else if Assigned(Node.FirstChild) then
exit;
end;
result := true
end;
// ================================================================================================
// Get Equals Tree Partial
// ================================================================================================
function TAST.GetEqualsTreePartial(Node: TAST): boolean;
begin
result := false;
if Equals[Node] then
if Assigned(FirstChild) then
if not FirstChild.EqualsListPartial[Node] then
exit;
result := true
end;
// ================================================================================================
// Get Token Names
// ================================================================================================
function TAST.GetTokenNames: TStringList;
begin
result := TStringList.Create;
result.AddStrings(fTokenNames);
end;
// ================================================================================================
// Set Verbose
// ================================================================================================
procedure TAST.SetVerbose(Verbose: boolean; Names: TStringList);
begin
fVerbose := Verbose;
fTokenNames.Clear;
ftokenNames.AddStrings(Names);
end;
// ================================================================================================
// AddChild
// ================================================================================================
procedure TAST.AddChild(node: TAST);
var
n: TAST;
begin
if Assigned(node) then
begin
n := fDown;
if Assigned(n) then
begin
while Assigned(n.fRight) do
n := n.fRight;
n.fRight := node
end
else
fDown := node
end;
end;
// ================================================================================================
// Remove Children
// ================================================================================================
procedure TAST.RemoveChildren;
begin
fDown := nil
end;
end.
@@ -0,0 +1,12 @@
unit astProgram;
interface
type
TastProgram = class
end;
implementation
end.
Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

+38
View File
@@ -0,0 +1,38 @@
program wpTest;
{$APPTYPE CONSOLE}
uses
Classes,
SysUtils,
wpLex in '..\wpLex.pas',
wpPar in '..\wpPar.pas',
astProgram in '..\..\tools\ast\astProgram.pas',
ast in '..\..\tools\ast\ast.pas';
var
stm : TFileStream;
lex : TwpLex;
par : TwpPar;
begin
try
stm := TFileStream.Create('x.pas', fmOpenRead);
stm.Seek(0, soFromBeginning);
lex := TwpLex.Create(stm);
par := TwpPar.Create(lex);
par.prg;
stm.Free;
lex.Free;
par.Free;
except
on E: Exception do
Writeln(E.ClassName, ': ', E.Message);
end;
end.
+107
View File
@@ -0,0 +1,107 @@
<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<ProjectGuid>{E509B25A-366D-449B-B4C6-013162829AAF}</ProjectGuid>
<ProjectVersion>12.0</ProjectVersion>
<MainSource>wpTest.dpr</MainSource>
<Config Condition="'$(Config)'==''">Debug</Config>
<DCC_DCCCompiler>DCC32</DCC_DCCCompiler>
</PropertyGroup>
<PropertyGroup Condition="'$(Config)'=='Base' or '$(Base)'!=''">
<Base>true</Base>
</PropertyGroup>
<PropertyGroup Condition="'$(Config)'=='Release' or '$(Cfg_1)'!=''">
<Cfg_1>true</Cfg_1>
<CfgParent>Base</CfgParent>
<Base>true</Base>
</PropertyGroup>
<PropertyGroup Condition="'$(Config)'=='Debug' or '$(Cfg_2)'!=''">
<Cfg_2>true</Cfg_2>
<CfgParent>Base</CfgParent>
<Base>true</Base>
</PropertyGroup>
<PropertyGroup Condition="'$(Base)'!=''">
<DCC_DependencyCheckOutputName>wpTest.exe</DCC_DependencyCheckOutputName>
<DCC_ImageBase>00400000</DCC_ImageBase>
<DCC_UnitAlias>WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE;$(DCC_UnitAlias)</DCC_UnitAlias>
<DCC_Platform>x86</DCC_Platform>
<DCC_E>false</DCC_E>
<DCC_N>false</DCC_N>
<DCC_S>false</DCC_S>
<DCC_F>false</DCC_F>
<DCC_K>false</DCC_K>
</PropertyGroup>
<PropertyGroup Condition="'$(Cfg_1)'!=''">
<DCC_LocalDebugSymbols>false</DCC_LocalDebugSymbols>
<DCC_Define>RELEASE;$(DCC_Define)</DCC_Define>
<DCC_SymbolReferenceInfo>0</DCC_SymbolReferenceInfo>
<DCC_DebugInformation>false</DCC_DebugInformation>
</PropertyGroup>
<PropertyGroup Condition="'$(Cfg_2)'!=''">
<DCC_Define>DEBUG;$(DCC_Define)</DCC_Define>
</PropertyGroup>
<ItemGroup>
<DelphiCompile Include="wpTest.dpr">
<MainSource>MainSource</MainSource>
</DelphiCompile>
<DCCReference Include="..\wpLex.pas"/>
<DCCReference Include="..\wpPar.pas"/>
<DCCReference Include="..\..\tools\ast\astProgram.pas"/>
<DCCReference Include="..\..\tools\ast\ast.pas"/>
<BuildConfiguration Include="Base">
<Key>Base</Key>
</BuildConfiguration>
<BuildConfiguration Include="Debug">
<Key>Cfg_2</Key>
<CfgParent>Base</CfgParent>
</BuildConfiguration>
<BuildConfiguration Include="Release">
<Key>Cfg_1</Key>
<CfgParent>Base</CfgParent>
</BuildConfiguration>
</ItemGroup>
<Import Project="$(BDS)\Bin\CodeGear.Delphi.Targets" Condition="Exists('$(BDS)\Bin\CodeGear.Delphi.Targets')"/>
<ProjectExtensions>
<Borland.Personality>Delphi.Personality.12</Borland.Personality>
<Borland.ProjectType/>
<BorlandProject>
<Delphi.Personality>
<Source>
<Source Name="MainSource">wpTest.dpr</Source>
</Source>
<Parameters>
<Parameters Name="UseLauncher">False</Parameters>
<Parameters Name="LoadAllSymbols">True</Parameters>
<Parameters Name="LoadUnspecifiedSymbols">False</Parameters>
</Parameters>
<VersionInfo>
<VersionInfo Name="IncludeVerInfo">False</VersionInfo>
<VersionInfo Name="AutoIncBuild">False</VersionInfo>
<VersionInfo Name="MajorVer">1</VersionInfo>
<VersionInfo Name="MinorVer">0</VersionInfo>
<VersionInfo Name="Release">0</VersionInfo>
<VersionInfo Name="Build">0</VersionInfo>
<VersionInfo Name="Debug">False</VersionInfo>
<VersionInfo Name="PreRelease">False</VersionInfo>
<VersionInfo Name="Special">False</VersionInfo>
<VersionInfo Name="Private">False</VersionInfo>
<VersionInfo Name="DLL">False</VersionInfo>
<VersionInfo Name="Locale">1031</VersionInfo>
<VersionInfo Name="CodePage">1252</VersionInfo>
</VersionInfo>
<VersionInfoKeys>
<VersionInfoKeys Name="CompanyName"/>
<VersionInfoKeys Name="FileDescription"/>
<VersionInfoKeys Name="FileVersion">1.0.0.0</VersionInfoKeys>
<VersionInfoKeys Name="InternalName"/>
<VersionInfoKeys Name="LegalCopyright"/>
<VersionInfoKeys Name="LegalTrademarks"/>
<VersionInfoKeys Name="OriginalFilename"/>
<VersionInfoKeys Name="ProductName"/>
<VersionInfoKeys Name="ProductVersion">1.0.0.0</VersionInfoKeys>
<VersionInfoKeys Name="Comments"/>
</VersionInfoKeys>
</Delphi.Personality>
</BorlandProject>
<ProjectFileVersion>12</ProjectFileVersion>
</ProjectExtensions>
</Project>
+13
View File
@@ -0,0 +1,13 @@
program prg1 (input,output);
label 1,4,5;
const
a = 3.14;
b = 'hello';
c = d;
asd = 12345;
begin
end.
Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

+607
View File
@@ -0,0 +1,607 @@
unit wpLex;
interface
uses
Classes,
SysUtils,
Generics.Collections;
type
TTokenType =
(
TT_EOF,
TT_SKIP,
TT_COMMENT,
TT_LPAREN, // (
TT_RPAREN, // )
TT_LBRACKET, // [
TT_RBRACKET, // ]
TT_STAR, // *
TT_SLASH, // /
TT_PLUS, // +
TT_MINUS, // -
TT_LT, // <
TT_LE, // <=
TT_GT, // >
TT_GE, // >=
TT_EQ, // =
TT_NE, // <>
TT_COLON, // :
TT_ASSIGN, // :=
TT_DOT, // .
TT_RANGE, // ..
TT_PTR, // ^
TT_COMMA, // ,
TT_SEMI, // ;
TT_DOLLAR, // $
TT_AT, // @
TT_SHARP, // #
TT_ID,
TT_UINT,
TT_UREAL,
TT_CHAR,
TT_STRING,
TT_HEX,
TT_BIN,
LT_DO,
LT_IF,
LT_IN,
LT_OF,
LT_OR,
LT_TO,
LT_AND,
LT_DIV,
LT_END,
LT_FOR,
LT_MOD,
LT_NIL,
LT_NOT,
LT_SET,
LT_VAR,
LT_XOR,
LT_CASE,
LT_ELSE,
LT_FILE,
LT_GOTO,
LT_THEN,
LT_TYPE,
LT_USES,
LT_WITH,
LT_ARRAY,
LT_BEGIN,
LT_CONST,
LT_LABEL,
LT_UNTIL,
LT_WHILE,
LT_DOWNTO,
LT_PACKED,
LT_RECORD,
LT_REPEAT,
LT_PROGRAM,
LT_FUNCTION,
LT_PROCEDURE
);
TTokenTypes = set of TTokenType;
TBlah = set of byte;
TToken = class
TokenType : TTokenType;
TokenText : AnsiString;
TokenLine : integer;
TokenColumn : integer;
end;
TTokenMap = TDictionary<AnsiString,TTokenType>;
TwpLex = class
private
fBuffer : PAnsiChar;
fStart : PAnsiChar;
fForward : PAnsiChar;
fLiterals : TTokenMap;
fTokenLine : integer;
fTokenColumn: integer;
private
procedure InitLiterals;
function CheckLiteral( TokenText : AnsiString;
TokenType : TTokenType): TTokenType;
function MakeToken( TokenText : AnsiString;
TokenType : TTokenType): TToken;
public
function NextToken : TToken;
public
constructor Create( Stream: TStream; Length: Int64=-1);
destructor Destroy; override;
end;
EwpLex = Exception;
implementation
uses
Windows;
{ TwpLex }
// @@@: Construction/destruction ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//
// Construction/destruction
//
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ================================================================================================
// Constructor
// ================================================================================================
constructor TwpLex.Create(Stream: TStream; Length: Int64);
var
size : Int64;
token : TToken;
begin
inherited Create;
InitLiterals;
if Assigned(Stream) then
begin
if Length < 0
then size := Stream.Size - Stream.Position
else size := Length;
fBuffer := GetMemory(size+1);
Stream.Read( fBuffer^, size);
fStart := fBuffer;
fForward := fBuffer;
fBuffer[size] := #0;
end;
end;
// ================================================================================================
// Destructor
// ================================================================================================
destructor TwpLex.Destroy;
begin
FreeAndNil(fLiterals);
inherited;
end;
// @@@: Internals +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//
// Internals
//
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ================================================================================================
// Make Token
// ================================================================================================
function TwpLex.MakeToken(TokenText: AnsiString; TokenType: TTokenType): TToken;
begin
result := TToken.Create;
result.TokenLine := 0;
result.TokenColumn := 0;
result.TokenType := TokenType;
result.TokenText := TokenText;
// if TokenType = TT_COMMENT
// then result.TokenText := TokenText
// else result.TokenText := UpperCase(TokenText);
end;
// ================================================================================================
// Init Literals
// ================================================================================================
procedure TwpLex.InitLiterals;
begin
fLiterals := TTokenMap.Create;
fLiterals.Add('do', LT_DO);
fLiterals.Add('if', LT_IF);
fLiterals.Add('in', LT_IN);
fLiterals.Add('of', LT_OF);
fLiterals.Add('or', LT_OR);
fLiterals.Add('to', LT_TO);
fLiterals.Add('and', LT_AND);
fLiterals.Add('div', LT_DIV);
fLiterals.Add('end', LT_END);
fLiterals.Add('for', LT_FOR);
fLiterals.Add('mod', LT_MOD);
fLiterals.Add('nil', LT_NIL);
fLiterals.Add('not', LT_NOT);
fLiterals.Add('set', LT_SET);
fLiterals.Add('var', LT_VAR);
fLiterals.Add('xor', LT_XOR);
fLiterals.Add('case', LT_CASE);
fLiterals.Add('else', LT_ELSE);
fLiterals.Add('file', LT_FILE);
fLiterals.Add('goto', LT_GOTO);
fLiterals.Add('then', LT_THEN);
fLiterals.Add('type', LT_TYPE);
fLiterals.Add('uses', LT_USES);
fLiterals.Add('with', LT_WITH);
fLiterals.Add('array', LT_ARRAY);
fLiterals.Add('begin', LT_BEGIN);
fLiterals.Add('const', LT_CONST);
fLiterals.Add('label', LT_LABEL);
fLiterals.Add('until', LT_UNTIL);
fLiterals.Add('while', LT_WHILE);
fLiterals.Add('downto', LT_DOWNTO);
fLiterals.Add('packed', LT_PACKED);
fLiterals.Add('record', LT_RECORD);
fLiterals.Add('repeat', LT_REPEAT);
fLiterals.Add('program', LT_PROGRAM);
fLiterals.Add('function', LT_FUNCTION);
fLiterals.Add('procedure', LT_PROCEDURE);
end;
// ================================================================================================
// Check Literal
// ================================================================================================
function TwpLex.CheckLiteral(TokenText: AnsiString; TokenType: TTokenType): TTokenType;
var
ttype : TTokenType;
begin
if fLiterals.TryGetValue(TokenText, ttype)
then result := ttype
else result := TokenType
end;
// @@@: Interface +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//
// Interface
//
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ================================================================================================
// Next Token
// ================================================================================================
function TwpLex.NextToken: TToken;
function GetTokenText: AnsiString;
begin
SetLength(result, fForward-fStart);
MoveMemory( @result[1], fStart, fForward-fStart);
end;
var
ttext : AnsiString;
ttype : TTokenType;
begin
result := nil;
while true do
begin
result := nil;
fForward := fStart;
case fForward^ of
// id
'a'..'z','A'..'Z','_':
begin
INC(fForward);
ttype := TT_ID;
while fForward^ in ['a'..'z','A'..'Z','0'..'9','_'] do
INC(fForward);
ttype := CheckLiteral( GetTokenText, ttype);
end;
// uint or ureal
'0'..'9':
begin
INC(fForward);
ttype := TT_UINT;
while fForward^ in ['0'..'9'] do
INC(fForward);
// fractional part
if fForward^ = '.' then
begin
INC(fForward);
if fForward^ in ['0'..'9'] then
begin
INC(fForward);
ttype := TT_UREAL;
while fForward^ in ['0'..'9'] do
INC(fForward);
end
else if fForward^ = '.' then
ttype := TT_RANGE
else
raise EwpLex.Create('Expected 0..9 in fractional part');
end;
// exponential part
if ttype in [TT_UINT, TT_UREAL] then
begin
if fForward^ in ['e','E'] then
begin
INC(fForward);
ttype := TT_UREAL;
if fForward^ in ['+','-'] then
INC(fForward);
if fForward^ in ['0'..'9'] then
begin
INC(fForward);
while fForward^ in ['0'..'9'] do
INC(fForward);
end
else
raise EwpLex.Create('Expected +,-,0..9 in exponential part');
end;
end;
end;
// <,<=,<>
'<':
begin
INC(fForward);
ttype := TT_LT;
if fForward^ in ['=','>'] then
begin
case fForward^ of
'=': ttype := TT_LE;
'>': ttype := TT_NE;
end;
INC(fForward);
end;
end;
// >,>=
'>':
begin
INC(fForward);
ttype := TT_GT;
if fForward^ = '=' then
begin
INC(fForward);
ttype := TT_GE;
end;
end;
// :,:=
':':
begin
INC(fForward);
ttype := TT_COLON;
if fForward^ = '=' then
begin
INC(fForward);
ttype := TT_ASSIGN
end
end;
// .,..
'.':
begin
INC(fForward);
ttype := TT_DOT;
if fForward^ = '.' then
begin
INC(fForward);
ttype := TT_RANGE;
end
end;
// string
'''':
begin
INC(fForward);
ttype := TT_CHAR;
while true do
begin
if fForward^ in [#10,#13,#0] then
raise EwpLex.Create('Newline/EOF found in string');
if fForward^ = '''' then
begin
INC(fForward);
if fForward^ = ''''
then INC(fForward)
else break
end
else
INC(fForward)
end;
end;
// /,//
'/':
begin
INC(fForward);
ttype := TT_SLASH;
if fForward^ = '/' then
begin
INC(fForward);
ttype := TT_COMMENT;
while not (fForward^ in [#13,#10,#0]) do
INC(fForward);
end
end;
// comment
'{':
begin
INC(fForward);
ttype := TT_COMMENT;
while not (fForward^ in ['}',#0]) do
INC(fForward);
if fForward^ = #0
then raise EwpLex.Create('EOF reached in comment')
else INC( fForward);
end;
'(':
begin
INC(fForward);
ttype := TT_LPAREN;
if fForward^ = '*' then
begin
INC(fForward);
ttype := TT_COMMENT;
while true do
begin
if fForward^ = #0 then
raise EwpLex.Create('EOF reached in comment');
if fForward^ = '*' then
begin
INC(fForward);
if fForward^ = ')' then
begin
INC(fForward);
break;
end;
end
end
end
end;
// hex number
'$':
begin
INC(fForward);
if fForward^ in ['0'..'9','a'..'f','A'..'F'] then
begin
INC(fForward);
ttype := TT_HEX;
while fForward^ in ['0'..'9','a'..'f','A'..'F'] do
INC(fForward);
end
else
raise EwpLex.Create('Expected hexadecimal digit');
end;
'%':
begin
INC(fForward);
if fForward^ in ['0'..'1'] then
begin
INC(fForward);
ttype := TT_BIN;
while fForward^ in ['0'..'1'] do
INC(fForward);
end
else
raise EwpLex.Create('Expected binary digit');
end;
else
case fForward^ of
')': begin ttype := TT_RPAREN; INC(fForward) end;
'[': begin ttype := TT_LBRACKET; INC(fForward) end;
']': begin ttype := TT_RBRACKET; INC(fForward) end;
'*': begin ttype := TT_STAR; INC(fForward) end;
'+': begin ttype := TT_PLUS; INC(fForward) end;
'-': begin ttype := TT_MINUS; INC(fForward) end;
'=': begin ttype := TT_EQ; INC(fForward) end;
'^': begin ttype := TT_PTR; INC(fForward) end;
';': begin ttype := TT_SEMI; INC(fForward) end;
',': begin ttype := TT_COMMA; INC(fForward) end;
'$': begin ttype := TT_DOLLAR; INC(fForward) end;
'@': begin ttype := TT_AT; INC(fForward) end;
'#': begin ttype := TT_SHARP; INC(fForward) end;
#9 : begin ttype := TT_SKIP; INC(fForward) end;
#10: begin ttype := TT_SKIP; INC(fForward) end;
#13: begin ttype := TT_SKIP; INC(fForward) end;
#32: begin ttype := TT_SKIP; INC(fForward) end;
// EOF
#0 : ttype := TT_EOF;
else
raise EwpLex.Create('Invalid character '+fForward^);
end
end;
if ttype <> TT_SKIP then
begin
ttext := GetTokenText;
result := MakeToken( ttext, ttype);
fStart := fForward;
break;
end;
fStart := fForward;
end;
end;
end.
+418
View File
@@ -0,0 +1,418 @@
unit wpPar;
interface
uses
SysUtils,
Generics.Collections,
wpLex;
type
TStringMap = TDictionary<AnsiString,AnsiString>;
TwpPar = class
private
fLex : TwpLex;
fConstants : TStringMap;
fTypes : TStringMap;
protected
function Match( ttype : TTokenType; dispose: boolean=true):TToken; overload;
function Match( ttypes : TTokenTypes; dispose: boolean=true):TToken; overload;
protected
procedure block;
procedure constant;
procedure uconstant;
procedure typedef;
public
procedure prg;
public
constructor Create( Lexer: TwpLex);
destructor Destroy; override;
end;
EwpPar = Exception;
implementation
{ TwpPar }
// @@@: Construction / destruction ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//
// Construction / destruction
//
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ================================================================================================
// Constructor
// ================================================================================================
constructor TwpPar.Create(Lexer: TwpLex);
begin
inherited Create;
fLex := Lexer;
fConstants := TStringMap.Create;
fTypes := TStringMap.Create;
end;
// ================================================================================================
// Destructor
// ================================================================================================
destructor TwpPar.Destroy;
begin
fConstants .Free;
fTypes .Free;
inherited
end;
// @@@: Internals +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//
// Internals
//
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ================================================================================================
//
// ================================================================================================
function TwpPar.Match(ttype: TTokenType; dispose: boolean): TToken;
var
t: TToken;
begin
result := nil;
t := fLex.NextToken;
if t.TokenType = ttype then
if dispose
then t.Free
else result := t
else
raise EwpPar.Create('Unexpected token')
end;
// ================================================================================================
//
// ================================================================================================
function TwpPar.Match(ttypes: TTokenTypes; dispose: boolean): TToken;
var
t: TToken;
begin
result := nil;
t := fLex.NextToken;
if t.TokenType in ttypes then
if dispose
then t.Free
else result := t
else
raise EwpPar.Create('Unexpected token')
end;
// @@@: Interface +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//
// Interface
//
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ================================================================================================
// Program
// ================================================================================================
procedure TwpPar.prg;
var
t : TToken;
begin
if Assigned( fLex) then
begin
Match(LT_PROGRAM);
Match(TT_ID);
Match(TT_LPAREN);
t := fLex.NextToken;
// id [, id]*
if t.TokenType = TT_ID then
begin
t.Free;
t := fLex.NextToken;
while t.TokenType = TT_COMMA do
begin
t.Free;
Match(TT_ID);
t := fLex.NextToken;
end;
end;
if t.TokenType <> TT_RPAREN then
raise EwpPar.Create('")" expected');
Match(TT_SEMI);
block;
Match(TT_DOT)
end;
end;
// ================================================================================================
// block
// ================================================================================================
procedure TwpPar.block;
var
token : TToken;
ttype : TTokenType;
start : TTokenTypes;
start2: TTokenTypes;
begin
start := [LT_LABEL,LT_CONST,LT_TYPE,LT_VAR,LT_PROCEDURE,LT_FUNCTION,LT_BEGIN];
start2:= [LT_LABEL,LT_CONST,LT_TYPE,LT_VAR,LT_PROCEDURE,LT_FUNCTION];
token := fLex.NextToken;
if token.TokenType in start then
begin
while token.TokenType in start2 do
begin
case token.TokenType of
// ------------------------------------------------------
// Label
// ------------------------------------------------------
LT_LABEL:
while true do
begin
Match( TT_UINT);
token := fLex.NextToken;
ttype := token.TokenType;
token.Free;
case ttype of
TT_COMMA : ;
TT_SEMI : begin token := fLex.NextToken; break end;
else raise EwpPar.Create('Expected: ,;');
end;
end;
// ------------------------------------------------------
// Const
// ------------------------------------------------------
LT_CONST:
begin
token := fLex.NextToken;
ttype := token.TokenType;
while ttype = TT_ID do
begin
token.Free;
Match(TT_EQ);
constant;
Match(TT_SEMI);
token := flex.NextToken;
ttype := token.TokenType;
end;
end;
// ------------------------------------------------------
// Type
// ------------------------------------------------------
LT_TYPE:
begin
token := fLex.NextToken;
ttype := token.TokenType;
while ttype = TT_ID do
begin
token.Free;
Match(TT_EQ);
typedef;
Match(TT_SEMI);
token := flex.NextToken;
ttype := token.TokenType;
end;
end;
LT_VAR:
begin
end;
LT_PROCEDURE:
begin
end;
LT_FUNCTION:
begin
end;
end;
// token := fLex.NextToken
end;
if token.TokenType = LT_BEGIN then
begin
Match(LT_END)
end
else
raise EwpPar.Create('Expected: Begin');
end
else
raise EwpPar.Create('Expected: label, const, type, var, procedure, function, begin');
end;
// ================================================================================================
// Constant
// ================================================================================================
procedure TwpPar.constant;
var
token: TToken;
ttype: TTokenType;
begin
token := fLex.NextToken;
ttype := token.TokenType;
if ttype in [TT_PLUS,TT_MINUS,TT_ID,TT_UINT,TT_UREAL] then
begin
if ttype in [TT_PLUS,TT_MINUS] then
begin
token.Free;
token := fLex.NextToken;
ttype := token.TokenType;
end;
token.Free;
case ttype of
TT_ID : ;
TT_UINT : ;
TT_UREAL : ;
else raise EwpPar.Create('Expected: id,int,real')
end
end
else if ttype in [TT_CHAR, TT_STRING] then
begin
token.Free;
end
else
raise EwpPar.Create('Expected: +,-,id,int,real,string');
end;
// ================================================================================================
// Unsigned constant
// ================================================================================================
procedure TwpPar.uconstant;
var
token: TToken;
ttype: TTokenType;
begin
token := fLex.NextToken;
ttype := token.TokenType;
token.Free;
case token.TokenType of
TT_ID : ;
TT_UINT : ;
TT_UREAL : ;
LT_NIL : ;
TT_STRING: ;
else raise EwpPar.Create('Expected: id,int,real,nil,string')
end;
end;
// ================================================================================================
// Type
// ================================================================================================
procedure TwpPar.typedef;
var
token: TToken;
ttype: TTokenType;
ttext: AnsiString;
f_const : TTokenTypes;
begin
token := fLex.NextToken;
ttype := token.TokenType;
ttext := token.TokenText;
token.Free;
// ---------------------------------------------------------------
// TT_ID
//
// Identifier can be an existing type identifier, or an existing
// constant identifier. If it is contant identifier, then it must
// be a range specification.
// ---------------------------------------------------------------
if ttype = TT_ID then
begin
// --------------------------------------------------
// constant .. constant
// --------------------------------------------------
if fConstants.ContainsKey(ttext) then
begin
Match(TT_RANGE);
token := fLex.NextToken;
ttype := token.TokenType;
ttext := token.TokenText;
token.Free;
end
// --------------------------------------------------
// type identifier
// --------------------------------------------------
else if fTypes.ContainsKey(ttext) then
begin
end
// --------------------------------------------------
// Not an constant or type identifier
// --------------------------------------------------
else
EwpPar.Create('Expected a type or constant identifier' );
end;
end;
end.
+42
View File
@@ -0,0 +1,42 @@
program wp;
{$APPTYPE CONSOLE}
uses
Classes,
SysUtils,
dpgRTL,
wpParser in 'wpParser.pas',
wpLexer in 'wpLexer.pas';
var
stm: TFileStream;
lex: TwpLexer;
par: TwpParser;
begin
if ParamCount <> 1 then
begin
writeln('usage: wp <filename>');
exit;
end;
stm := nil;
par := nil;
try
stm := TFileStream.Create( ParamStr(1), fmOpenRead);
lex := TwpLexer.Create( stm);
par := TwpParser.Create(lex);
par.prog;
except
on e: EdpgMismatchedChar do writeln('SyntaxError: ' + IntToStr(e.Line));
on e: EdpgMismatchedToken do writeln('SyntaxError: ' + IntToStr(e.FoundToken.TokenLine));
else writeln('Syntax error');
end;
if stm <> nil then stm.free;
if par <> nil then par.free;
end.
+237
View File
@@ -0,0 +1,237 @@
unit wpLexer;
lexer TwpLexer;
options
{
exportVocab=wpLexer;
caseSensitive=false;
testLiterals=false;
k=2;
}
tokens
{
"do";
"if";
"in";
"of";
"or";
"to";
"and";
"div";
"end";
"for";
"mod";
"nil";
"not";
"set";
"var";
"xor";
"case";
"else";
"file";
"goto";
"then";
"type";
"uses";
"with";
"array";
"begin";
"const";
"label";
"until";
"while";
"downto";
"packed";
"record";
"repeat";
"program";
"function";
"procedure";
STRING;
CHAR;
}
// ============================================================================
// Simple tokens
// ============================================================================
LPAREN : '(';
RPAREN : ')';
LBRACKET : '[';
RBRACKET : ']';
COMMA : ',';
COLON : ':';
SEMI : ';';
DOT : '.';
RANGE : "..";
ASSIGN : ":=";
EQ : '=';
GT : '>';
LT : '<';
GE : ">=";
LE : "<=";
NE : "<>";
PLUS : '+';
MINUS : '-';
STAR : '*';
SLASH : '/';
PTR : '^';
// ============================================================================
// Identifier
// ============================================================================
ID
options
{
testLiterals=true;
}
: LETTER (LETTER | DIGIT)* ;
// ============================================================================
// Int or real
// ============================================================================
UINT_OR_REAL
: (UINT RANGE) => UINT { _ttype := TT_UINT; }
| (UINT DOT) => UREAL { _ttype := TT_UREAL; }
| (UINT ('E'|'e')) => UREAL { _ttype := TT_UREAL; }
| UINT { _ttype := TT_UINT; }
;
// ============================================================================
// Protected rules
// ============================================================================
protected
LETTER : 'a'..'z' | 'A'..'Z' | '_';
// ============================================================================
// Int
// ============================================================================
protected
UINT
: (DIGIT)+
;
// ============================================================================
// Real
// ============================================================================
protected
UREAL
: UINT ('.' UINT)? ( ('e' | 'E') ('+'|'-')? UINT)?
;
// ============================================================================
// Digit
// ============================================================================
protected
DIGIT
: '0'..'9'
;
// ============================================================================
// String or char
// ============================================================================
STRING_OR_CHAR
: '\'' (~'\'' | '\'' '\'')* '\''
{
if TokenText = '''''' then _ttype := TT_STRING
else if TokenText = '''''''''' then _ttype := TT_CHAR
else if Length( TokenText) > 3 then _ttype := TT_STRING
else _ttype := TT_CHAR;
}
;
// ============================================================================
// Single line comment
// ============================================================================
SLCOMMENT
:
"//"
( ~( '\r' | '\n') )*
(
'\r' '\n' { newLine; }
| '\r' { newLine; }
| '\n' { newLine; }
)
{
_ttype := TT_SKIP;
}
;
// ============================================================================
// Multi line comment version 1
// Nested comments aren't allowed!
// ============================================================================
MLCOMMENT1
:
"(*"
(
options
{
greedy = false;
generateAmbigWarnings = false;
}
: '\r' '\n' { newLine; }
| '\r' { newLine; }
| '\n' { newLine; }
| .
)*
"*)"
{
_ttype := TT_SKIP;
}
;
// ============================================================================
// Multi line comment version 2
// Nested comments aren't allowed!
// ============================================================================
MLCOMMENT2
:
"{"
(
options
{
greedy = false;
generateAmbigWarnings = false;
}
: '\r' '\n' { newLine; }
| '\r' { newLine; }
| '\n' { newLine; }
| .
)*
"}"
{
_ttype := TT_SKIP;
}
;
// ============================================================================
// White space
// ============================================================================
WS
:
(
'\r' '\n' { newLine; }
| '\r' { newLine; }
| '\n' { newLine; }
| '\t' { tab; }
| ' '
)
{
_ttype := TT_SKIP;
}
;
+368
View File
@@ -0,0 +1,368 @@
unit wpParser;
parser TwpParser;
options
{
importVocab = wpLexer;
k = 2;
}
// ============================================================================
// prog
// ============================================================================
prog
: "program" id (LPAREN id (COLON id)* RPAREN)? SEMI block DOT
;
// ============================================================================
// block
// ============================================================================
block
: declarations compoundStmt
;
// ============================================================================
// declarations
// ============================================================================
declarations
:
( "label" UINT (COMMA UINT)* SEMI )?
( "const" (id EQ constant SEMI)+ )?
( "type" (id EQ typeSpec SEMI)+ )?
( "var" (id (COMMA id)* COLON typeSpec SEMI)+ )?
(
"procedure" id parameterList SEMI block SEMI
| "function" id parameterList COLON id SEMI block SEMI
)*
;
// ============================================================================
// statement
// ============================================================================
statement
: (UINT COLON)?
(
(variable ASSIGN) => assignmentStmt
| procedureCall
| compoundStmt
| ifStmt
| caseStmt
| whileStmt
| repeatStmt
| forStmt
| withStmt
| gotoStmt
)?
;
// ============================================================================
// assignmentStmt
// ============================================================================
assignmentStmt
: variable ASSIGN expression
;
// ============================================================================
// procedureCall
// ============================================================================
procedureCall
: id (LPAREN expression (widthSpec)? (COMMA expression (widthSpec)? )* RPAREN)?
;
// ============================================================================
// widthSpec
// ============================================================================
widthSpec
: (COLON UINT) (COLON UINT)?
;
// ============================================================================
// compoundStmt
// ============================================================================
compoundStmt
: "begin" (statement (SEMI statement)*)? "end"
;
// ============================================================================
// ifStmt
// ============================================================================
ifStmt
: "if" expression "then" statement
(
("else") => "else" statement
|
)
;
// ============================================================================
// caseStmt
// ============================================================================
caseStmt
: "case" expression "of"
( caseStmtItem (SEMI caseStmtItem)* )?
"end"
;
// ============================================================================
// caseStmtItem
// ============================================================================
caseStmtItem
: constant (COMMA constant)* COLON statement
;
// ============================================================================
// whileStmt
// ============================================================================
whileStmt
: "while" expression "do" statement
;
// ============================================================================
// repeatStmt
// ============================================================================
repeatStmt
: "repeat" (statement (SEMI statement)*)? "until" expression
;
// ============================================================================
// forStmt
// ============================================================================
forStmt
: "for" id ASSIGN expression ("to" | "downto") expression "do" statement
;
// ============================================================================
// withStmt
// ============================================================================
withStmt
: "with" variable (COMMA variable)* "do" statement
;
// ============================================================================
// gotoStmt
// ============================================================================
gotoStmt
: "goto" UINT
;
// ============================================================================
// parameterList
// ============================================================================
parameterList
: (
LPAREN
parameter (SEMI parameter)*
RPAREN
)?
;
// ============================================================================
// parameter
// ============================================================================
parameter
: ("var" | "function")? id (COMMA id)* COLON typeId
| "procedure" id (COMMA id)*
;
// ============================================================================
// expression
// ============================================================================
expression
: simpleExpression (relOp simpleExpression)*
;
// ============================================================================
// simpleExpression
// ============================================================================
simpleExpression
: (PLUS|MINUS)? term (addOp term)*
;
// ============================================================================
// term
// ============================================================================
term
: factor (mulOp factor)*
;
// ============================================================================
// factor
// ============================================================================
factor
: uNumber
| "nil"
| CHAR
| STRING
| (id LPAREN) => procedureCall
| variable
| LPAREN expression RPAREN
| "not" factor
| LBRACKET (expression (RANGE expression)? (COMMA expression (RANGE expression)? )* )? RBRACKET
;
// ============================================================================
// variable
// ============================================================================
variable
: variableId
(
LBRACKET expression (COMMA expression)* RBRACKET
| DOT fieldId
| PTR
)*
;
// ============================================================================
// fieldList
// ============================================================================
fieldList
: simpleFieldList (simpleFieldList)* (variantFieldList)?
|
;
// ============================================================================
// simpleFieldList
// ============================================================================
simpleFieldList
: id (COMMA id)* COLON typeSpec
;
// ============================================================================
// caseFieldList
// ============================================================================
variantFieldList
: "case" (id COLON)? typeId "of"
constant (COMMA constant)* COLON LPAREN fieldList RPAREN
(SEMI constant (COMMA constant)* COLON LPAREN fieldList RPAREN)*
;
// ============================================================================
// typeSpecification
// ============================================================================
typeSpec
: simpleType
| PTR typeId
| ("packed")?
(
"array" LBRACKET simpleType (COMMA simpleType)* RBRACKET "of" typeSpec
| "file" "of" typeSpec
| "set" "of" simpleType
| "record" fieldList "end"
)
;
// ============================================================================
// simpleType
// ============================================================================
simpleType
: (constant RANGE) => constant RANGE constant
| typeId
| LPAREN id (COMMA id)* RPAREN
;
// ============================================================================
// constant
// ============================================================================
constant
: (PLUS | MINUS)? (constantId | uNumber)
| CHAR
;
// ============================================================================
// unsignedConstant
// ============================================================================
uConstant
: constantId
| uNumber
| "nil"
| CHAR
;
// ============================================================================
// unumber
// ============================================================================
uNumber
: UINT
| UREAL
;
// ============================================================================
// uint
// ============================================================================
uInt
: UINT;
// ============================================================================
// fieldId
// ============================================================================
fieldId
: id
;
// ============================================================================
// variableId
// ============================================================================
variableId
: id
;
// ============================================================================
// typeId
// ============================================================================
typeId
: id
;
// ============================================================================
// constantId
// ============================================================================
constantId
: id
;
// ============================================================================
// id
// ============================================================================
id
: ID
;
// ============================================================================
// relOp
// ============================================================================
relOp
: GT
| LT
| GE
| LE
| NE
| EQ
| "in"
;
// ============================================================================
// addOp
// ============================================================================
addOp
: PLUS
| MINUS
| "or"
| "xor"
;
// ============================================================================
// mulOp
// ============================================================================
mulOp
: STAR
| SLASH
| "div"
| "mod"
| "and"
;
+36
View File
@@ -0,0 +1,36 @@
program calc;
{$APPTYPE CONSOLE}
uses
Classes,
SysUtils,
calcLexer in 'calcLexer.pas',
calcParser in 'calcParser.pas';
var
stm: TFileStream;
lex: TcalcLexer;
par: TcalcParser;
begin
if ParamCount <> 1 then
begin
writeln('usage: calc <filename>');
exit;
end
else
begin
try
stm := TFileStream.Create( ParamStr(1), fmOpenRead);
lex := TcalcLexer.Create(stm);
par := TcalcParser.Create(lex);
par.calc;
except
end;
end;
stm.Free;
par.Free;
end.
+5
View File
@@ -0,0 +1,5 @@
1+2+3+4+5+6+7+8+9;
(((((2+3)))));
(-1*(-2*(-3*(-4+ -5))));
(-1*(-2*(-3*(-4+ 5))));
7 * -(-9);
+49
View File
@@ -0,0 +1,49 @@
// ============================================================================
// Demo lexer for four operator calculator
// ============================================================================
unit calcLexer;
lexer TcalcLexer;
options
{
exportVocab = calcLexer;
}
// ============================================================================
// Simple tokens
// ============================================================================
LPAREN : '(';
RPAREN : ')';
PLUS : '+';
MINUS : '-';
STAR : '*';
SLASH : '/';
SEMI : ';';
// ============================================================================
// INT
// ============================================================================
INT : ('0'..'9')+;
// ============================================================================
// White space
// ============================================================================
WS
:
(
options
{
generateAmbigWarnings = false;
}
: '\r' '\n' { newLine; }
| '\r' { newLine; }
| '\n' { newLine; }
| '\t' { tab; }
| ' '
)
{
_ttype := TT_SKIP;
}
;
+430
View File
@@ -0,0 +1,430 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.1.0d
// Grammar: calcLexer
// ============================================================================
unit calcLexer;
interface
uses
calcLexerTokens,
Classes,
Contnrs,
dpgLexer,
dpgToken,
dpgTypes,
SysUtils;
type
// =========================================================================
// Class TcalcLexer declaration
// =========================================================================
TcalcLexer = class( TdpgLexer)
protected // Public grammar rules ("rescoped")
procedure mLPAREN ( pCreate: boolean);
procedure mRPAREN ( pCreate: boolean);
procedure mPLUS ( pCreate: boolean);
procedure mMINUS ( pCreate: boolean);
procedure mSTAR ( pCreate: boolean);
procedure mSLASH ( pCreate: boolean);
procedure mSEMI ( pCreate: boolean);
procedure mINT ( pCreate: boolean);
procedure mWS ( pCreate: boolean);
public
function NextToken: IdpgToken; override;
end;
implementation
uses
dpgException,
dpgExceptionSemantic,
dpgExceptionMismatchedChar;
// ============================================================================
// mLPAREN
// ============================================================================
procedure TcalcLexer.mLPAREN( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_LPAREN;
match('(');
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mRPAREN
// ============================================================================
procedure TcalcLexer.mRPAREN( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_RPAREN;
match(')');
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mPLUS
// ============================================================================
procedure TcalcLexer.mPLUS( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_PLUS;
match('+');
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mMINUS
// ============================================================================
procedure TcalcLexer.mMINUS( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_MINUS;
match('-');
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mSTAR
// ============================================================================
procedure TcalcLexer.mSTAR( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_STAR;
match('*');
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mSLASH
// ============================================================================
procedure TcalcLexer.mSLASH( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_SLASH;
match('/');
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mSEMI
// ============================================================================
procedure TcalcLexer.mSEMI( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_SEMI;
match(';');
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mINT
// ============================================================================
procedure TcalcLexer.mINT( pCreate: boolean);
var
_begin: integer;
_cnt_10: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_INT;
_cnt_10 := 0;
while(true) do
begin
if (( LA(1) in ['0'..'9'])) then
begin
match( ['0'..'9']);
end
else
begin
if _cnt_10 >= 1 then
break
else
Raise EdpgMismatchedChar.Create( LA(1), ['0'..'9'], FileName, Line, Column);
end;
INC(_cnt_10);
end;
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mWS
// ============================================================================
procedure TcalcLexer.mWS( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_WS;
if (( LA(1) in [#13])) then
begin
match(#13);
match(#10);
newLine;
end
else if (( LA(1) in [#13])) then
begin
match(#13);
newLine;
end
else if (( LA(1) in [#10])) then
begin
match(#10);
newLine;
end
else if (( LA(1) in [#9])) then
begin
match(#9);
tab;
end
else if (( LA(1) in [' '])) then
begin
match(' ');
end
else
Raise EdpgMismatchedChar.Create( LA(1), [#9..#10,#13,' '], FileName, Line, Column);
_ttype := TT_SKIP;
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ----------------------------------------------------------------------------
// NextToken
// ----------------------------------------------------------------------------
function TcalcLexer.NextToken : IdpgToken;
var
_first : TdpgCharSet;
begin
_first := [#9..#10,#13,' ','('..'+','-','/'..'9',';'];
while( true) do
begin
ResetText;
try
if (( LA(1) in ['('])) then
begin
mLPAREN(true);
result := fReturnToken;
end
else if (( LA(1) in [')'])) then
begin
mRPAREN(true);
result := fReturnToken;
end
else if (( LA(1) in ['+'])) then
begin
mPLUS(true);
result := fReturnToken;
end
else if (( LA(1) in ['-'])) then
begin
mMINUS(true);
result := fReturnToken;
end
else if (( LA(1) in ['*'])) then
begin
mSTAR(true);
result := fReturnToken;
end
else if (( LA(1) in ['/'])) then
begin
mSLASH(true);
result := fReturnToken;
end
else if (( LA(1) in [';'])) then
begin
mSEMI(true);
result := fReturnToken;
end
else if (( LA(1) in ['0'..'9'])) then
begin
mINT(true);
result := fReturnToken;
end
else if (( LA(1) in [#9..#10,#13,' '])) then
begin
mWS(true);
result := fReturnToken;
end
else
begin
if LA(1) = EOF_CHAR then
begin
uponEof;
result := TdpgToken.Create(TT_EOF);
end
else
Raise EdpgMismatchedChar.Create(LA(1), _first, FileName, Line, Column);
end;
// --------------------------------------------------------------
// If we found a SKIP token, then try again...
// --------------------------------------------------------------
if result = nil then
continue;
// --------------------------------------------------------------
// Now we have a valid token, so exit the function
// --------------------------------------------------------------
break;
except
Raise;
end;
end;
end;
end.
+24
View File
@@ -0,0 +1,24 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.1.0d
// Grammar: calcLexer
// ============================================================================
unit calcLexerTokens;
interface
const
TT_EOF = 1;
TT_LPAREN = 4;
TT_RPAREN = 5;
TT_PLUS = 6;
TT_MINUS = 7;
TT_STAR = 8;
TT_SLASH = 9;
TT_SEMI = 10;
TT_INT = 11;
TT_WS = 12;
implementation
end.
+12
View File
@@ -0,0 +1,12 @@
// $Delphi Parser Generator: calcLexer -> calcLexerTokens.txt$
TcalcLexer
TT_EOF=1
TT_LPAREN=4
TT_RPAREN=5
TT_PLUS=6
TT_MINUS=7
TT_STAR=8
TT_SLASH=9
TT_SEMI=10
TT_INT=11
TT_WS=12
+93
View File
@@ -0,0 +1,93 @@
// ============================================================================
// Demo parser for four operator calculator
// ============================================================================
unit calcParser;
parser TcalcParser;
options
{
importVocab = calcLexer;
exportVocab = calcParser;
// k = 2;
}
// ============================================================================
// calc
// ============================================================================
calc
local
{
v: integer;
}
: (v=expression SEMI {writeln(v);} )+
;
// ============================================================================
// expression
// ============================================================================
expression returns [integer]
: result=simpleExpression
;
// ============================================================================
// simpleExpression
// ============================================================================
simpleExpression returns [integer]
local
{
v : integer;
}
: result=term
(
PLUS v=term { result := result + v; }
| MINUS v=term { result := result - v; }
)*
;
// ============================================================================
// term
// ============================================================================
term returns [integer]
local
{
v : integer;
}
: result=factor
(
STAR v=factor { result := result * v; }
| SLASH v=factor { result := result div v; }
)*
;
// ============================================================================
// factor
// ============================================================================
factor returns [integer]
local
{
s: integer;
}
{
s := 1;
}
:
(
PLUS { s := 1; }
| MINUS { s := -1; }
)?
(
result=uInt
| LPAREN result=expression RPAREN
)
{
result := s * result;
}
;
// ============================================================================
// uInt
// ============================================================================
uInt returns [integer]
: x:INT { result := StrToInt( x.TokenText); }
;
+203
View File
@@ -0,0 +1,203 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.1.0d
// Grammar: calcParser
// ============================================================================
unit calcParser;
interface
uses
calcParserTokens,
Classes,
Contnrs,
dpgLLkParser,
dpgToken,
dpgTypes,
SysUtils;
type
// =========================================================================
// Class TcalcParser declaration
// =========================================================================
TcalcParser = class( TdpgLLkParser)
public // Public grammar rules
procedure calc ;
procedure expression ;
procedure simpleExpression ;
procedure term ;
procedure factor ;
procedure uInt ;
end;
implementation
uses
dpgException,
dpgExceptionSemantic,
dpgExceptionMismatchedToken;
// ============================================================================
// calc
// ============================================================================
procedure TcalcParser.calc;
var
_cnt_16: integer;
v: integer;
begin
_cnt_16 := 0;
while(true) do
begin
if (( LA(1) in [TT_LPAREN,TT_PLUS..TT_MINUS,TT_INT])) then
begin
expression;
match(TT_SEMI);
writeln(v);
end
else
begin
if _cnt_16 >= 1 then
break
else
Raise EdpgMismatchedToken.Create( LT(1), [TT_LPAREN,TT_PLUS..TT_MINUS,TT_INT], FileName);
end;
INC(_cnt_16);
end;
end;
// ============================================================================
// expression
// ============================================================================
procedure TcalcParser.expression;
begin
simpleExpression;
end;
// ============================================================================
// simpleExpression
// ============================================================================
procedure TcalcParser.simpleExpression;
var
v : integer;
begin
term;
while(true) do
begin
if (( LA(1) in [TT_PLUS])) then
begin
match(TT_PLUS);
term;
result := result + v;
end
else if (( LA(1) in [TT_MINUS])) then
begin
match(TT_MINUS);
term;
result := result - v;
end
else
break;
end;
end;
// ============================================================================
// term
// ============================================================================
procedure TcalcParser.term;
var
v : integer;
begin
factor;
while(true) do
begin
if (( LA(1) in [TT_STAR])) then
begin
match(TT_STAR);
factor;
result := result * v;
end
else if (( LA(1) in [TT_SLASH])) then
begin
match(TT_SLASH);
factor;
result := result div v;
end
else
break;
end;
end;
// ============================================================================
// factor
// ============================================================================
procedure TcalcParser.factor;
var
s: integer;
begin
s := 1;
if (( LA(1) in [TT_PLUS])) then
begin
match(TT_PLUS);
s := 1;
end
else if (( LA(1) in [TT_MINUS])) then
begin
match(TT_MINUS);
s := -1;
end;
if (( LA(1) in [TT_INT])) then
begin
uInt;
end
else if (( LA(1) in [TT_LPAREN])) then
begin
match(TT_LPAREN);
expression;
match(TT_RPAREN);
end
else
Raise EdpgMismatchedToken.Create( LT(1), [TT_LPAREN,TT_INT], FileName);
result := s * result;
end;
// ============================================================================
// uInt
// ============================================================================
procedure TcalcParser.uInt;
var
x: IdpgToken;
begin
x := LT(1);
match(TT_INT);
result := StrToInt( x.TokenText);
end;
end.
+24
View File
@@ -0,0 +1,24 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.1.0d
// Grammar: calcParser
// ============================================================================
unit calcParserTokens;
interface
const
TT_EOF = 1;
TT_LPAREN = 4;
TT_RPAREN = 5;
TT_PLUS = 6;
TT_MINUS = 7;
TT_STAR = 8;
TT_SLASH = 9;
TT_SEMI = 10;
TT_INT = 11;
TT_WS = 12;
implementation
end.
+12
View File
@@ -0,0 +1,12 @@
// $Delphi Parser Generator: calcParser -> calcParserTokens.txt$
TcalcParser
TT_EOF=1
TT_LPAREN=4
TT_RPAREN=5
TT_PLUS=6
TT_MINUS=7
TT_STAR=8
TT_SLASH=9
TT_SEMI=10
TT_INT=11
TT_WS=12
+11
View File
@@ -0,0 +1,11 @@
To build demo project you must first compile the grammars.
This grammar CAN NOT! be compiled with the demo version of dpg.
1. dpg calcLexer.g
2. dpg calcParser.g
After the compilation the project can be opened in delphi. Be sure that the dpg
runtime library is in the delphi library path. (In the project settings,
or in the environment settings).
Have fun...
+91
View File
@@ -0,0 +1,91 @@
unit hocLexer;
// ============================================================================
// Lexer class declaration
// ============================================================================
lexer ThocLexer;
// ----------------------------------------------------------------------------
// Lexer options
// ----------------------------------------------------------------------------
options
{
k = 2;
exportVocab = hocLexer;
caseSensitive = false;
}
// ============================================================================
// Begin rule definitions
//
// Remember: All lexer rule names must begin with UPPERCASE letter!
// ============================================================================
// ----------------------------------------------------------------------------
// Simple tokens
// ----------------------------------------------------------------------------
LPAREN : '(';
RPAREN : ')';
PLUS : '+';
MINUS : '-';
STAR : '*';
SLASH : '/';
// ----------------------------------------------------------------------------
// NUMBER
// ----------------------------------------------------------------------------
UNUMBER
: UINT ('.' UINT)?
;
// ----------------------------------------------------------------------------
// UINT
// ----------------------------------------------------------------------------
protected
UINT
: (DIGIT)+
;
// ----------------------------------------------------------------------------
// DIGIT
// ----------------------------------------------------------------------------
protected
DIGIT
: '0'..'9'
;
// ----------------------------------------------------------------------------
// NEWLINE
// ----------------------------------------------------------------------------
NEWLINE
:
(
options
{
generateAmbigWarnings = false;
}
: '\r' '\n' { newLine; }
| '\r' { newLine; }
| '\n' { newLine; }
)
;
// ----------------------------------------------------------------------------
// WHITESPACE
// ----------------------------------------------------------------------------
WHITESPACE
:
(
' '
| '\t' { tab; }
)
{
_ttype := TT_SKIP;
}
;
// ============================================================================
// End rule definitions
// ============================================================================
+505
View File
@@ -0,0 +1,505 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.1.0r
// Grammar: hocLexer
// ============================================================================
unit hocLexer;
interface
uses
Classes,
Contnrs,
dpgLexer,
dpgToken,
dpgTypes,
hocLexerTokens,
SysUtils;
type
// =========================================================================
// Class ThocLexer declaration
// =========================================================================
ThocLexer = class( TdpgLexer)
protected // Internals
procedure initialize; override;
protected // Protected grammar rules
procedure mUINT ( pCreate: boolean);
procedure mDIGIT ( pCreate: boolean);
protected // Public grammar rules ("rescoped")
procedure mLPAREN ( pCreate: boolean);
procedure mRPAREN ( pCreate: boolean);
procedure mPLUS ( pCreate: boolean);
procedure mMINUS ( pCreate: boolean);
procedure mSTAR ( pCreate: boolean);
procedure mSLASH ( pCreate: boolean);
procedure mUNUMBER ( pCreate: boolean);
procedure mNEWLINE ( pCreate: boolean);
procedure mWHITESPACE ( pCreate: boolean);
public
function NextToken: IdpgToken; override;
end;
implementation
uses
dpgException,
dpgExceptionSemantic,
dpgExceptionMismatchedChar;
// ============================================================================
// mLPAREN
// ============================================================================
procedure ThocLexer.mLPAREN( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_LPAREN;
match('(');
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mRPAREN
// ============================================================================
procedure ThocLexer.mRPAREN( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_RPAREN;
match(')');
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mPLUS
// ============================================================================
procedure ThocLexer.mPLUS( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_PLUS;
match('+');
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mMINUS
// ============================================================================
procedure ThocLexer.mMINUS( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_MINUS;
match('-');
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mSTAR
// ============================================================================
procedure ThocLexer.mSTAR( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_STAR;
match('*');
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mSLASH
// ============================================================================
procedure ThocLexer.mSLASH( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_SLASH;
match('/');
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mUNUMBER
// ============================================================================
procedure ThocLexer.mUNUMBER( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_UNUMBER;
mUINT(false);
if (( LA(1) in ['.'])) then
begin
match('.');
mUINT(false);
end;
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mUINT
// ============================================================================
procedure ThocLexer.mUINT( pCreate: boolean);
var
_begin: integer;
_cnt_40: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_UINT;
_cnt_40 := 0;
while(true) do
begin
if (( LA(1) in ['0'..'9'])) then
begin
mDIGIT(false);
end
else
begin
if _cnt_40 >= 1 then
break
else
Raise EdpgMismatchedChar.Create( LA(1), ['0'..'9'], FileName, Line, Column);
end;
INC(_cnt_40);
end;
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mDIGIT
// ============================================================================
procedure ThocLexer.mDIGIT( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_DIGIT;
match( ['0'..'9']);
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mNEWLINE
// ============================================================================
procedure ThocLexer.mNEWLINE( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_NEWLINE;
if (( LA(1) in [#13]) and (LA(2) in [#10])) then
begin
match(#13);
match(#10);
newLine;
end
else if (( LA(1) in [#13])) then
begin
match(#13);
newLine;
end
else if (( LA(1) in [#10])) then
begin
match(#10);
newLine;
end
else
Raise EdpgMismatchedChar.Create( LA(1), [#10,#13], FileName, Line, Column);
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mWHITESPACE
// ============================================================================
procedure ThocLexer.mWHITESPACE( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_WHITESPACE;
if (( LA(1) in [' '])) then
begin
match(' ');
end
else if (( LA(1) in [#9])) then
begin
match(#9);
tab;
end
else
Raise EdpgMismatchedChar.Create( LA(1), [#9,' '], FileName, Line, Column);
_ttype := TT_SKIP;
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ----------------------------------------------------------------------------
// NextToken
// ----------------------------------------------------------------------------
function ThocLexer.NextToken : IdpgToken;
var
_first : TdpgCharSet;
begin
_first := [#9..#10,#13,' ','('..'+','-','/'..'9'];
while( true) do
begin
ResetText;
try
if (( LA(1) in ['('])) then
begin
mLPAREN(true);
result := fReturnToken;
end
else if (( LA(1) in [')'])) then
begin
mRPAREN(true);
result := fReturnToken;
end
else if (( LA(1) in ['+'])) then
begin
mPLUS(true);
result := fReturnToken;
end
else if (( LA(1) in ['-'])) then
begin
mMINUS(true);
result := fReturnToken;
end
else if (( LA(1) in ['*'])) then
begin
mSTAR(true);
result := fReturnToken;
end
else if (( LA(1) in ['/'])) then
begin
mSLASH(true);
result := fReturnToken;
end
else if (( LA(1) in ['0'..'9'])) then
begin
mUNUMBER(true);
result := fReturnToken;
end
else if (( LA(1) in [#10,#13])) then
begin
mNEWLINE(true);
result := fReturnToken;
end
else if (( LA(1) in [#9,' '])) then
begin
mWHITESPACE(true);
result := fReturnToken;
end
else
begin
if LA(1) = EOF_CHAR then
begin
uponEof;
result := TdpgToken.Create(TT_EOF);
end
else
Raise EdpgMismatchedChar.Create(LA(1), _first, FileName, Line, Column);
end;
// --------------------------------------------------------------
// If we found a SKIP token, then try again...
// --------------------------------------------------------------
if result = nil then
continue;
// --------------------------------------------------------------
// Now we have a valid token, so exit the function
// --------------------------------------------------------------
break;
except
Raise;
end;
end;
end;
// ----------------------------------------------------------------------------
// InitLiterals
// ----------------------------------------------------------------------------
procedure ThocLexer.initialize;
begin
fCaseSensitive := false;
fLiterals.CaseSensitive := false;
end;
end.
+26
View File
@@ -0,0 +1,26 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.1.0r
// Grammar: hocLexer
// ============================================================================
unit hocLexerTokens;
interface
const
TT_EOF = 1;
TT_LPAREN = 4;
TT_RPAREN = 5;
TT_PLUS = 6;
TT_MINUS = 7;
TT_STAR = 8;
TT_SLASH = 9;
TT_UNUMBER = 10;
TT_UINT = 11;
TT_DIGIT = 12;
TT_NEWLINE = 13;
TT_WHITESPACE = 14;
implementation
end.
+14
View File
@@ -0,0 +1,14 @@
// $Delphi Parser Generator: hocLexer -> hocLexerTokens.txt$
ThocLexer
TT_EOF=1
TT_LPAREN=4
TT_RPAREN=5
TT_PLUS=6
TT_MINUS=7
TT_STAR=8
TT_SLASH=9
TT_UNUMBER=10
TT_UINT=11
TT_DIGIT=12
TT_NEWLINE=13
TT_WHITESPACE=14
+101
View File
@@ -0,0 +1,101 @@
unit hocParser;
// ============================================================================
// Parser class declaration
// ============================================================================
parser ThocParser;
// ----------------------------------------------------------------------------
// Parser options
// ----------------------------------------------------------------------------
options
{
k = 2;
importVocab = hocLexer;
exportVocab = hocParser;
}
// ============================================================================
// Begin rule definitions
//
// Remember: All parser rule names must begin with LOWERCASE letter!
// ============================================================================
// ----------------------------------------------------------------------------
// prog
// ----------------------------------------------------------------------------
prog
local
{
val : double;
}
:
(
(
val = expression
)?
NEWLINE
)*
;
// ----------------------------------------------------------------------------
// expression
// ----------------------------------------------------------------------------
expression returns [double]
local
{
val : double;
}
: result = term
(
PLUS val = term { result := result + val; }
| MINUS val = term { result := result - val; }
)*
;
// ----------------------------------------------------------------------------
// term
// ----------------------------------------------------------------------------
term returns [double]
local
{
val : double;
}
: result = factor
(
STAR val = factor { result := result * val; }
| SLASH val = factor { result := result / val; }
)*
;
// ----------------------------------------------------------------------------
// factor
// ----------------------------------------------------------------------------
factor returns [double]
: result = uNumber
| LPAREN result = expression RPAREN
;
// ----------------------------------------------------------------------------
// uNumber
// ----------------------------------------------------------------------------
uNumber returns [double]
: n:UNUMBER
{
result := StrToIntDef( n.TokenText);
}
;
// ============================================================================
// End rule definitions
// ============================================================================
// ----------------------------------------------------------------------------
// This section is used for generating member defintions in the unit 'hocParser'.
// The content of the section is verbatim copied into the generated code.
// ----------------------------------------------------------------------------
memberdef
{
}
+170
View File
@@ -0,0 +1,170 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.1.0r
// Grammar: hocParser
// ============================================================================
unit hocParser;
interface
uses
Classes,
Contnrs,
dpgLLkParser,
dpgToken,
dpgTypes,
hocParserTokens,
SysUtils;
type
// =========================================================================
// Class ThocParser declaration
// =========================================================================
ThocParser = class( TdpgLLkParser)
public // Public grammar rules
procedure prog ;
function expression : double;
function term : double;
function factor : double;
function uNumber : double;
end;
implementation
uses
dpgException,
dpgExceptionSemantic,
dpgExceptionMismatchedToken;
// ============================================================================
// prog
// ============================================================================
procedure ThocParser.prog;
var
val : double;
begin
while(true) do
begin
if (( LA(1) in [TT_LPAREN,TT_UNUMBER,TT_NEWLINE])) then
begin
if (( LA(1) in [TT_LPAREN,TT_UNUMBER])) then
begin
val := expression;
end;
match(TT_NEWLINE);
end
else
break;
end;
end;
// ============================================================================
// expression
// ============================================================================
function ThocParser.expression: double;
var
val : double;
begin
result := term;
while(true) do
begin
if (( LA(1) in [TT_PLUS])) then
begin
match(TT_PLUS);
val := term;
result := result + val;
end
else if (( LA(1) in [TT_MINUS])) then
begin
match(TT_MINUS);
val := term;
result := result - val;
end
else
break;
end;
end;
// ============================================================================
// term
// ============================================================================
function ThocParser.term: double;
var
val : double;
begin
result := factor;
while(true) do
begin
if (( LA(1) in [TT_STAR])) then
begin
match(TT_STAR);
val := factor;
result := result * val;
end
else if (( LA(1) in [TT_SLASH])) then
begin
match(TT_SLASH);
val := factor;
result := result / val;
end
else
break;
end;
end;
// ============================================================================
// factor
// ============================================================================
function ThocParser.factor: double;
begin
if (( LA(1) in [TT_UNUMBER])) then
begin
result := uNumber;
end
else if (( LA(1) in [TT_LPAREN])) then
begin
match(TT_LPAREN);
result := expression;
match(TT_RPAREN);
end
else
Raise EdpgMismatchedToken.Create( LT(1), [TT_LPAREN,TT_UNUMBER], FileName);
end;
// ============================================================================
// uNumber
// ============================================================================
function ThocParser.uNumber: double;
var
n: IdpgToken;
begin
n := LT(1);
match(TT_UNUMBER);
result := StrToIntDef( n.TokenText);
end;
end.
@@ -0,0 +1,26 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.1.0r
// Grammar: hocParser
// ============================================================================
unit hocParserTokens;
interface
const
TT_EOF = 1;
TT_LPAREN = 4;
TT_RPAREN = 5;
TT_PLUS = 6;
TT_MINUS = 7;
TT_STAR = 8;
TT_SLASH = 9;
TT_UNUMBER = 10;
TT_UINT = 11;
TT_DIGIT = 12;
TT_NEWLINE = 13;
TT_WHITESPACE = 14;
implementation
end.
@@ -0,0 +1,14 @@
// $Delphi Parser Generator: hocParser -> hocParserTokens.txt$
ThocParser
TT_EOF=1
TT_LPAREN=4
TT_RPAREN=5
TT_PLUS=6
TT_MINUS=7
TT_STAR=8
TT_SLASH=9
TT_UNUMBER=10
TT_UINT=11
TT_DIGIT=12
TT_NEWLINE=13
TT_WHITESPACE=14
+43
View File
@@ -0,0 +1,43 @@
unit filter;
lexer Tfilter;
options
{
k = 2;
filter = true;
}
// ----------------------------------------------------------------------------
// Paragraph
// ----------------------------------------------------------------------------
P
: "<p>"
;
// ----------------------------------------------------------------------------
// Break
// ----------------------------------------------------------------------------
BR
: "<br>"
;
// ----------------------------------------------------------------------------
// Newline
// ----------------------------------------------------------------------------
NEWLINE
:
(
'\r' '\n' { newLine; _ttype := TT_SKIP; }
| '\r' { newLine; _ttype := TT_SKIP; }
| '\n' { newLine; _ttype := TT_SKIP; }
)
;
// ----------------------------------------------------------------------------
// Tab
// ----------------------------------------------------------------------------
TAB
: '\t' { tab; _ttype := TT_SKIP; }
;
+234
View File
@@ -0,0 +1,234 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.0.78r
// Grammar: filter.g
// ============================================================================
unit filter;
interface
uses
Classes,
Contnrs,
dpgLexer,
dpgToken,
dpgTypes,
filterTokens,
SysUtils;
type
// =========================================================================
// Class Tfilter declaration
// =========================================================================
Tfilter = class( TdpgLexer)
protected // Public grammar rules ("rescoped")
procedure mP ( pCreate: boolean);
procedure mBR ( pCreate: boolean);
procedure mNEWLINE ( pCreate: boolean);
procedure mTAB ( pCreate: boolean);
public
function NextToken: IdpgToken; override;
end;
implementation
uses
dpgException,
dpgExceptionSemantic,
dpgExceptionMismatchedChar;
// ============================================================================
// mP
// ============================================================================
procedure Tfilter.mP( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_P;
match('<p>');
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mBR
// ============================================================================
procedure Tfilter.mBR( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_BR;
match('<br>');
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mNEWLINE
// ============================================================================
procedure Tfilter.mNEWLINE( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_NEWLINE;
if (( LA(1) in [#13]) and (LA(2) in [#10])) then
begin
match(#13);
match(#10);
newLine; _ttype := TT_SKIP;
end
else if (( LA(1) in [#13])) then
begin
match(#13);
newLine; _ttype := TT_SKIP;
end
else if (( LA(1) in [#10])) then
begin
match(#10);
newLine; _ttype := TT_SKIP;
end
else
Raise EdpgMismatchedChar.Create( LA(1), [#10,#13], FileName, Line, Column);
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mTAB
// ============================================================================
procedure Tfilter.mTAB( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_TAB;
match(#9);
tab; _ttype := TT_SKIP;
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ----------------------------------------------------------------------------
// NextToken
// ----------------------------------------------------------------------------
function Tfilter.NextToken : IdpgToken;
begin
while( true) do
begin
ResetText;
try
if (( LA(1) in ['<']) and (LA(2) in ['p'])) then
begin
mP(true);
result := fReturnToken;
end
else if (( LA(1) in ['<']) and (LA(2) in ['b'])) then
begin
mBR(true);
result := fReturnToken;
end
else if (( LA(1) in [#10,#13])) then
begin
mNEWLINE(true);
result := fReturnToken;
end
else if (( LA(1) in [#9])) then
begin
mTAB(true);
result := fReturnToken;
end
else
begin
if LA(1) = EOF_CHAR then
begin
uponEof;
result := TdpgToken.Create(TT_EOF);
end
else
begin
consume;
continue;
end;
end;
// --------------------------------------------------------------
// If we found a SKIP token, then try again...
// --------------------------------------------------------------
if result = nil then
continue;
// --------------------------------------------------------------
// Now we have a valid token, so exit the function
// --------------------------------------------------------------
break;
except
consume;
continue;
end;
end;
end;
end.
+19
View File
@@ -0,0 +1,19 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.0.78r
// Grammar: filter.g
// ============================================================================
unit filterTokens;
interface
const
TT_EOF = 1;
TT_P = 4;
TT_BR = 5;
TT_NEWLINE = 6;
TT_TAB = 7;
implementation
end.
+7
View File
@@ -0,0 +1,7 @@
// $Delphi Parser Generator: filter.pas -> TfilterTokens.txt$
Tfilter
TT_EOF=1
TT_P=4
TT_BR=5
TT_NEWLINE=6
TT_TAB=7
@@ -0,0 +1,367 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.0.118r
// Grammar: javadoclexer.g
// ============================================================================
unit JavaDocLexer;
interface
uses
Classes,
Contnrs,
dpgLexer,
dpgToken,
dpgTokenStreamSelector,
dpgTypes,
JavaDocTokens,
SysUtils;
type
// =========================================================================
// Class TJavaDocLexer declaration
// =========================================================================
TJavaDocLexer = class( TdpgLexer)
public
Selector : IdpgTokenStreamSelector;
protected // Protected grammar rules
procedure mID ( pCreate: boolean);
protected // Public grammar rules ("rescoped")
procedure mPARAM ( pCreate: boolean);
procedure mEXCEPTION ( pCreate: boolean);
procedure mSTAR ( pCreate: boolean);
procedure mJAVADOC_CLOSE ( pCreate: boolean);
procedure mNEWLINE ( pCreate: boolean);
public
function NextToken: IdpgToken; override;
end;
implementation
uses
dpgException,
dpgExceptionSemantic,
dpgExceptionMismatchedChar;
// ============================================================================
// mPARAM
// ============================================================================
procedure TJavaDocLexer.mPARAM( pCreate: boolean);
var
_begin: integer;
_cnt_3: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_PARAM;
match('@param');
_cnt_3 := 0;
while(true) do
begin
if (( LA(1) in [' '])) then
begin
match(' ');
end
else
begin
if _cnt_3 >= 1 then
break
else
Raise EdpgMismatchedChar.Create( LA(1), [' '], FileName, Line, Column);
end;
INC(_cnt_3);
end;
mID(false);
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mID
// ============================================================================
procedure TJavaDocLexer.mID( pCreate: boolean);
var
_begin: integer;
_cnt_9: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_ID;
_cnt_9 := 0;
while(true) do
begin
if (( LA(1) in ['a'..'z'])) then
begin
match( ['a'..'z']);
end
else
begin
if _cnt_9 >= 1 then
break
else
Raise EdpgMismatchedChar.Create( LA(1), ['a'..'z'], FileName, Line, Column);
end;
INC(_cnt_9);
end;
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mEXCEPTION
// ============================================================================
procedure TJavaDocLexer.mEXCEPTION( pCreate: boolean);
var
_begin: integer;
_cnt_6: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_EXCEPTION;
match('@exception');
_cnt_6 := 0;
while(true) do
begin
if (( LA(1) in [' '])) then
begin
match(' ');
end
else
begin
if _cnt_6 >= 1 then
break
else
Raise EdpgMismatchedChar.Create( LA(1), [' '], FileName, Line, Column);
end;
INC(_cnt_6);
end;
mID(false);
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mSTAR
// ============================================================================
procedure TJavaDocLexer.mSTAR( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_STAR;
match('*');
_ttype := TT_SKIP;
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mJAVADOC_CLOSE
// ============================================================================
procedure TJavaDocLexer.mJAVADOC_CLOSE( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_JAVADOC_CLOSE;
match('*/');
Selector.Pop;
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mNEWLINE
// ============================================================================
procedure TJavaDocLexer.mNEWLINE( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_NEWLINE;
if (( LA(1) in [#13]) and (LA(2) in [#10])) then
begin
match(#13);
match(#10);
end
else if (( LA(1) in [#13])) then
begin
match(#13);
end
else if (( LA(1) in [#10])) then
begin
match(#10);
end
else
Raise EdpgMismatchedChar.Create( LA(1), [#10,#13], FileName, Line, Column);
newLine;
_ttype := TT_SKIP;
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ----------------------------------------------------------------------------
// NextToken
// ----------------------------------------------------------------------------
function TJavaDocLexer.NextToken : IdpgToken;
var
la1 : char;
la2 : char;
begin
while( true) do
begin
ResetText;
try
la1 := LA(1);
la2 := LA(2);
if (( LA(1) in ['@']) and (LA(2) in ['p'])) then
begin
mPARAM(true);
result := fReturnToken;
end
else if (( LA(1) in ['@']) and (LA(2) in ['e'])) then
begin
mEXCEPTION(true);
result := fReturnToken;
end
else if (( LA(1) in ['*']) and (LA(2) in ['/'])) then
begin
mJAVADOC_CLOSE(true);
result := fReturnToken;
end
else if (( LA(1) in ['*'])) then
begin
mSTAR(true);
result := fReturnToken;
end
else if (( LA(1) in [#10,#13])) then
begin
mNEWLINE(true);
result := fReturnToken;
end
else
begin
if LA(1) = EOF_CHAR then
begin
uponEof;
result := TdpgToken.Create(TT_EOF);
end
else
begin
consume;
continue;
end;
end;
// --------------------------------------------------------------
// If we found a SKIP token, then try again...
// --------------------------------------------------------------
if result = nil then
continue;
// --------------------------------------------------------------
// Now we have a valid token, so exit the function
// --------------------------------------------------------------
break;
except
consume;
continue;
end;
end;
end;
end.
@@ -0,0 +1,21 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.0.118r
// Grammar: javadoclexer.g
// ============================================================================
unit JavaDocTokens;
interface
const
TT_EOF = 1;
TT_PARAM = 4;
TT_EXCEPTION = 5;
TT_ID = 6;
TT_STAR = 7;
TT_JAVADOC_CLOSE = 8;
TT_NEWLINE = 9;
implementation
end.
@@ -0,0 +1,9 @@
// $Delphi Parser Generator: JavaDocLexer.pas -> TJavaDocLexerTokens.txt$
TJavaDocLexer
TT_EOF=1
TT_PARAM=4
TT_EXCEPTION=5
TT_ID=6
TT_STAR=7
TT_JAVADOC_CLOSE=8
TT_NEWLINE=9
+286
View File
@@ -0,0 +1,286 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.0.118r
// Grammar: javalexer.g
// ============================================================================
unit JavaLexer;
interface
uses
Classes,
Contnrs,
dpgLexer,
dpgToken,
dpgTokenStreamSelector,
dpgTypes,
JavaTokens,
SysUtils;
type
// =========================================================================
// Class TJavaLexer declaration
// =========================================================================
TJavaLexer = class( TdpgLexer)
public
Selector : IdpgTokenStreamSelector;
protected // Internals
procedure initialize; override;
protected // Public grammar rules ("rescoped")
procedure mSEMI ( pCreate: boolean);
procedure mJAVADOC_OPEN ( pCreate: boolean);
procedure mID ( pCreate: boolean);
procedure mWS ( pCreate: boolean);
public
function NextToken: IdpgToken; override;
end;
implementation
uses
dpgException,
dpgExceptionSemantic,
dpgExceptionMismatchedChar;
// ============================================================================
// mSEMI
// ============================================================================
procedure TJavaLexer.mSEMI( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_SEMI;
match(';');
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mJAVADOC_OPEN
// ============================================================================
procedure TJavaLexer.mJAVADOC_OPEN( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_JAVADOC_OPEN;
match('/**');
Selector.Push('docLexer');
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mID
// ============================================================================
procedure TJavaLexer.mID( pCreate: boolean);
var
_begin: integer;
_cnt_5: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_ID;
_cnt_5 := 0;
while(true) do
begin
if (( LA(1) in ['a'..'z'])) then
begin
match( ['a'..'z']);
end
else
begin
if _cnt_5 >= 1 then
break
else
Raise EdpgMismatchedChar.Create( LA(1), ['a'..'z'], FileName, Line, Column);
end;
INC(_cnt_5);
end;
_ttype := testLit( _ttype);
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ============================================================================
// mWS
// ============================================================================
procedure TJavaLexer.mWS( pCreate: boolean);
var
_begin: integer;
_save: integer;
_token: IdpgToken;
_ttype: integer;
begin
_begin := Length( fText) +1;
_token := nil;
_ttype := TT_WS;
if (( LA(1) in [' '])) then
begin
match(' ');
end
else if (( LA(1) in [#9])) then
begin
match(#9);
end
else if (( LA(1) in [#10,#13])) then
begin
if (( LA(1) in [#13]) and (LA(2) in [#10])) then
begin
match(#13);
match(#10);
end
else if (( LA(1) in [#13])) then
begin
match(#13);
end
else if (( LA(1) in [#10])) then
begin
match(#10);
end
else
Raise EdpgMismatchedChar.Create( LA(1), [#10,#13], FileName, Line, Column);
newLine;
end
else
Raise EdpgMismatchedChar.Create( LA(1), [#9..#10,#13,' '], FileName, Line, Column);
_ttype := TT_SKIP;
if (_ttype <> TT_SKIP) and (pCreate = true) then
begin
_token := makeToken( _ttype);
_token.TokenText := Copy( fText, _begin, Length(fText)-_begin+1);
end;
fReturnToken := _token;
end;
// ----------------------------------------------------------------------------
// NextToken
// ----------------------------------------------------------------------------
function TJavaLexer.NextToken : IdpgToken;
var
_first : TdpgCharSet;
begin
_first := [#9..#10,#13,' ','/',';','a'..'z'];
while( true) do
begin
ResetText;
try
if (( LA(1) in [';'])) then
begin
mSEMI(true);
result := fReturnToken;
end
else if (( LA(1) in ['/'])) then
begin
mJAVADOC_OPEN(true);
result := fReturnToken;
end
else if (( LA(1) in ['a'..'z'])) then
begin
mID(true);
result := fReturnToken;
end
else if (( LA(1) in [#9..#10,#13,' '])) then
begin
mWS(true);
result := fReturnToken;
end
else
begin
if LA(1) = EOF_CHAR then
begin
uponEof;
result := TdpgToken.Create(TT_EOF);
end
else
Raise EdpgMismatchedChar.Create(LA(1), _first, FileName, Line, Column);
end;
// --------------------------------------------------------------
// If we found a SKIP token, then try again...
// --------------------------------------------------------------
if result = nil then
continue;
// --------------------------------------------------------------
// Now we have a valid token, so exit the function
// --------------------------------------------------------------
break;
except
Raise;
end;
end;
end;
// ----------------------------------------------------------------------------
// InitLiterals
// ----------------------------------------------------------------------------
procedure TJavaLexer.initialize;
begin
fLiterals.Add('int', 10);
end;
end.
@@ -0,0 +1,25 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.0.118r
// Grammar: javalexer.g
// ============================================================================
unit JavaTokens;
interface
const
TT_EOF = 1;
TT_PARAM = 4;
TT_EXCEPTION = 5;
TT_ID = 6;
TT_STAR = 7;
TT_JAVADOC_CLOSE = 8;
TT_NEWLINE = 9;
LT_int = 10;
TT_SEMI = 11;
TT_JAVADOC_OPEN = 12;
TT_WS = 13;
implementation
end.
@@ -0,0 +1,13 @@
// $Delphi Parser Generator: JavaLexer.pas -> TJavaLexerTokens.txt$
TJavaLexer
TT_EOF=1
TT_PARAM=4
TT_EXCEPTION=5
TT_ID=6
TT_STAR=7
TT_JAVADOC_CLOSE=8
TT_NEWLINE=9
LT_int="int"=10
TT_SEMI=11
TT_JAVADOC_OPEN=12
TT_WS=13
+49
View File
@@ -0,0 +1,49 @@
program demo;
{$APPTYPE CONSOLE}
uses
Classes,
SysUtils,
dpgTokenStreamSelector,
javaLexer,
javaDocLexer,
javaParser;
var
stm : TFileStream;
lexJava : TJavaLexer;
lexJavaDoc : TJavaDocLexer;
parJava : TJavaParser;
sel : TdpgTokenStreamSelector;
begin
if ParamCount = 1 then
begin
try
stm := TFileStream.Create( ParamStr(1), fmOpenRead);
sel := TdpgTokenStreamSelector.Create;
lexJava := TJavaLexer.Create( stm);
lexJavaDoc := TJavaDocLexer.Create( lexJava.InputState);
lexJava.Selector := sel;
lexJavaDoc.Selector := sel;
sel.add( lexJava, 'main');
sel.add( lexJavaDoc, 'docLexer');
sel.select( 'main');
parJava := TJavaParser.Create( sel);
parJava.input;
except
writeln('Exception...');
end;
end;
{ TODO -oUser -cConsole Main : Insert code here }
end.
@@ -0,0 +1,76 @@
unit JavaDocLexer;
uses
{
dpgTokenStreamSelector;
}
lexer TJavaDocLexer;
options
{
k = 2;
exportVocab = JavaDoc;
filter = true;
}
memberdecl
{
public
Selector : IdpgTokenStreamSelector;
}
// ----------------------------------------------------------------------------
// @param
// ----------------------------------------------------------------------------
PARAM
: "@param" (' ')+ ID
;
// ----------------------------------------------------------------------------
// @exception
// ----------------------------------------------------------------------------
EXCEPTION
: "@exception" (' ')+ ID
;
// ----------------------------------------------------------------------------
// identifier
// ----------------------------------------------------------------------------
protected ID
: ('a'..'z')+
;
// ----------------------------------------------------------------------------
// Star
//
// This rule simply prevents JAVADOC_CLOSE from being called for every '*' in
// a comment. Calling JAVADOC_CLOSE will fail for simple '*' and cause an
// exception, which is slow. In other words, the grammar will work without
// this rule, but is slower.
// ----------------------------------------------------------------------------
STAR
: '*' { _ttype := TT_SKIP; }
;
// ----------------------------------------------------------------------------
// JavaDocClose
// ----------------------------------------------------------------------------
JAVADOC_CLOSE
: "*/" { Selector.Pop; }
;
// ----------------------------------------------------------------------------
// NewLine
// ----------------------------------------------------------------------------
NEWLINE
:
(
'\r' '\n'
| '\r'
| '\n'
)
{
newLine;
_ttype := TT_SKIP;
}
;
+68
View File
@@ -0,0 +1,68 @@
unit JavaLexer;
uses
{
dpgTokenStreamSelector;
}
lexer TJavaLexer;
options
{
k = 2;
importVocab = JavaDoc;
exportVocab = Java;
}
tokens
{
"int";
}
memberdecl
{
public
Selector : IdpgTokenStreamSelector;
}
// ----------------------------------------------------------------------------
// Simple tokens
// ----------------------------------------------------------------------------
SEMI : ';';
// ----------------------------------------------------------------------------
// JavaDocOpen
// ----------------------------------------------------------------------------
JAVADOC_OPEN
: "/**" { Selector.Push('docLexer'); }
;
// ----------------------------------------------------------------------------
// Identifier
// ----------------------------------------------------------------------------
ID
options
{
testLiterals = true;
}
: ('a'..'z')+
;
WS
:
(
' '
| '\t'
|
(
'\r' '\n'
| '\r'
| '\n'
)
{
newLine;
}
)
{
_ttype := TT_SKIP;
}
;
+20
View File
@@ -0,0 +1,20 @@
unit javaParser;
parser TJavaParser;
options
{
k = 2;
importVocab = Java;
}
input
: ( (javadoc)? "int" ID SEMI)+
;
javadoc
:
JAVADOC_OPEN
(PARAM)?
(EXCEPTION)?
JAVADOC_CLOSE
;
@@ -0,0 +1,91 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.0.118r
// Grammar: javaparser.g
// ============================================================================
unit javaParser;
interface
uses
Classes,
Contnrs,
dpgLLkParser,
dpgToken,
dpgTypes,
javaParserTokens,
SysUtils;
type
// =========================================================================
// Class TJavaParser declaration
// =========================================================================
TJavaParser = class( TdpgLLkParser)
public // Public grammar rules
procedure input ;
procedure javadoc ;
end;
implementation
uses
dpgException,
dpgExceptionSemantic,
dpgExceptionMismatchedToken;
// ============================================================================
// input
// ============================================================================
procedure TJavaParser.input;
var
_cnt_4: integer;
begin
_cnt_4 := 0;
while(true) do
begin
if (( LA(1) in [LT_int,TT_JAVADOC_OPEN])) then
begin
if (( LA(1) in [TT_JAVADOC_OPEN])) then
begin
javadoc;
end;
match(LT_int);
match(TT_ID);
match(TT_SEMI);
end
else
begin
if _cnt_4 >= 1 then
break
else
Raise EdpgMismatchedToken.Create( LT(1), [LT_int,TT_JAVADOC_OPEN], FileName);
end;
INC(_cnt_4);
end;
end;
// ============================================================================
// javadoc
// ============================================================================
procedure TJavaParser.javadoc;
begin
match(TT_JAVADOC_OPEN);
if (( LA(1) in [TT_PARAM])) then
begin
match(TT_PARAM);
end;
if (( LA(1) in [TT_EXCEPTION])) then
begin
match(TT_EXCEPTION);
end;
match(TT_JAVADOC_CLOSE);
end;
end.
@@ -0,0 +1,25 @@
// ============================================================================
// This file is generated by the Delphi Parser Generator.
// ----------------------------------------------------------------------------
// DPG version: 1.0.0.118r
// Grammar: javaparser.g
// ============================================================================
unit javaParserTokens;
interface
const
TT_EOF = 1;
TT_PARAM = 4;
TT_EXCEPTION = 5;
TT_ID = 6;
TT_STAR = 7;
TT_JAVADOC_CLOSE = 8;
TT_NEWLINE = 9;
LT_int = 10;
TT_SEMI = 11;
TT_JAVADOC_OPEN = 12;
TT_WS = 13;
implementation
end.
@@ -0,0 +1,13 @@
// $Delphi Parser Generator: javaParser.pas -> TJavaParserTokens.txt$
TJavaParser
TT_EOF=1
TT_PARAM=4
TT_EXCEPTION=5
TT_ID=6
TT_STAR=7
TT_JAVADOC_CLOSE=8
TT_NEWLINE=9
LT_int="int"=10
TT_SEMI=11
TT_JAVADOC_OPEN=12
TT_WS=13
+20
View File
@@ -0,0 +1,20 @@
/** a javadoc comment
* @param foo
* @exception bar
* Just a little text for a comment
*/
int abc;
/** a javadoc comment
* @param foo
* @exception bar
* Just a little text for a comment
*/
int zzz;
/** a javadoc comment
* @param foo
* @exception bar
* Just a little text for a comment
*/
int xxx;
Binary file not shown.
+36
View File
@@ -0,0 +1,36 @@
\documentclass{zlbook}
\usepackage{minitoc}
%\usepackage[toc,page]{appendix}
%\usepackage{mtcoff}
\title{Delphi Parser Generator \\ user's guide}
\begin{document}
\dominitoc
\dominilof
\dominilot
\pagestyle{empty}
\renewcommand{\thepage}{\roman{page}}
\maketitle
\tableofcontents
%\listoftables
\renewcommand{\thepage}{\thechapter\ - \arabic{page}}
\clearpage
\pagestyle{fancy}
\input{src/intro/intro}
\input{src/start/start}
\input{src/lang/lang}
\input{src/gram/gram}
\input{src/tokens/tokens}
\input{src/rt/rt}
\appendix
\renewcommand{\thepage}{\Alph{chapter} - \arabic{page}}
%\begin{appendices}
\input{src/app/app-grammar}
%\end{appendices}
\end{document}
+625
View File
@@ -0,0 +1,625 @@
\chapter{Grammar of Delphi Parser Generator}
\clearpage \section{Lexical analyzer}
\begin{verbatim}
unit dpgDpgLexer;
lexer TdpgDpgLexer;
options
{
testLiterals = false;
k = 2;
}
tokens
{
"unit";
"uses";
"const";
"type";
"lexer";
"parser";
"options";
"tokens";
"memberdecl";
"memberdef";
"private";
"protected";
"public";
"returns";
"local";
"except";
"finally";
SEMPRED;
USES;
OPTIONS;
TOKENS;
}
// --------------------------------------------------------
// Simple tokens
// --------------------------------------------------------
LPAREN: '(';
RPAREN: ')';
RCURLY: '}';
COLON: ':';
SEMI: ';';
COMMA: ',';
ASSIGN: '=';
IMPLIES: "=>";
QUEST: '?';
PLUS: '+';
STAR: '*';
NOT: '~';
OR: '|';
BANG: '!';
WILDCARD: '.';
RANGE: "..";
// --------------------------------------------------------
// Character literal
// --------------------------------------------------------
CHARLIT
: '\''! (ESC | ~'\'') '\''! ;
// --------------------------------------------------------
// String literal
// --------------------------------------------------------
STRINGLIT
: '"' (ESC | ~'"')* '"' ;
// --------------------------------------------------------
// Integer
// --------------------------------------------------------
INTEGER local
{
i: integer;
v: integer;
}
: DNUMBER
{
v := 0;
for i:=1 to Length( TokenText) do
begin
v := v * 10 + ord( TokenText[i]) - ord('0');
end;
TokenText := IntToStr( v);
}
;
// --------------------------------------------------------
// Argument action
// --------------------------------------------------------
ARGACTION
:
'['!
(
'\r' '\n' { newLine; }
| '\r' { newLine; }
| '\n' { newLine; }
| ~']'
)*
']'!
;
// --------------------------------------------------------
// Action
// --------------------------------------------------------
ACTION
:
'{'
(
'\r' '\n' { newLine; }
| '\r' { newLine; }
| '\n' { newLine; }
| ~'}'
)*
'}'
( '?'! { _ttype := TT_SEMPRED; } )?
;
// --------------------------------------------------------
// Token ref
// --------------------------------------------------------
TOKENREF
options
{
testLiterals = true;
}
: 'A'..'Z' ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* ;
// --------------------------------------------------------
// Rule ref
// --------------------------------------------------------
RULEREF
local
{
t: integer;
}
:
t = INT_RULEREF { _ttype := t; }
(
{t = LT_uses}? WS_LOOP ('{' { _ttype := TT_USES; } )?
| {t = LT_options}? WS_LOOP ('{' { _ttype := TT_OPTIONS; } )?
| {t = LT_tokens}? WS_LOOP ('{' { _ttype := TT_TOKENS; } )?
)?
;
// --------------------------------------------------------
// Internal rule ref
// --------------------------------------------------------
protected INT_RULEREF returns [integer]
{
_ttype := TT_RULEREF;
}
: 'a'..'z' ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
{
result := TestLiteral( _ttype);
}
;
// --------------------------------------------------------
// COMMENT
// --------------------------------------------------------
COMMENT
: SLCOMMENT { _ttype := TT_SKIP; }
| MLCOMMENT { _ttype := TT_SKIP; }
;
// --------------------------------------------------------
// SLCOMMENT
// --------------------------------------------------------
protected SLCOMMENT
:
"//"
( ~( '\r' | '\n') )*
(
'\r' '\n' { newLine; }
| '\r' { newLine; }
| '\n' { newLine; }
)
;
// --------------------------------------------------------
// Multi line comment version
// Nested comments aren't allowed!
// --------------------------------------------------------
protected MLCOMMENT
:
"(*"
(
options
{
greedy = false;
}
: '\r' '\n' { newLine; }
| '\r' { newLine; }
| '\n' { newLine; }
| .
)*
"*)"
;
// --------------------------------------------------------
// Numbers
// --------------------------------------------------------
protected DNUMBER: '0'..'9' (DDIGIT)*;
protected DDIGIT: '0'..'9';
// --------------------------------------------------------
// WS
// --------------------------------------------------------
WS
:
(
' '
| '\t' { tab; }
| '\r' '\n' { newLine; }
| '\r' { newLine; }
| '\n' { newLine; }
)
{
_ttype := TT_SKIP;
}
;
// --------------------------------------------------------
// WS_LOOP
// --------------------------------------------------------
protected
WS_LOOP
:
(
options
{
greedy = true;
}
: WS
| COMMENT
)*
;
// --------------------------------------------------------
// Esc
// --------------------------------------------------------
protected
ESC
: '\\'! ( 'r' | 'n' | 't' | '\'' | '"' )
;
\end{verbatim}
\clearpage \section{Parser}
\begin{verbatim}
unit dpgDpgParser;
parser TdpgDpgParser;
options
{
defaultErrorHandler = false;
importVocab = dpgDpgLexer;
k = 2;
}
// --------------------------------------------------------
// grammar
// --------------------------------------------------------
grammar
: "unit" id SEMI
(usesDecl)?
(constDecl)?
(typeDecl)?
classDecl
;
// --------------------------------------------------------
// usesDecl
// --------------------------------------------------------
usesDecl
: USES
(
TOKENREF SEMI
| RULEREF SEMI
)*
RCURLY
;
// --------------------------------------------------------
// constDecl
// --------------------------------------------------------
constDecl
: "const" ACTION
;
// --------------------------------------------------------
// typeDecl
// --------------------------------------------------------
typeDecl
: "type" ACTION
;
// --------------------------------------------------------
// classDecl
// --------------------------------------------------------
classDecl
local
{
grType: integer;
}
:
// --------------------------------------------------
// Determine parser type
// --------------------------------------------------
( "lexer" { grType := 0; }
| "parser" { grType := 1; }
)
// --------------------------------------------------
// get class name
// --------------------------------------------------
id
SEMI
// --------------------------------------------------
// Process optional class "options {...}" clause
// --------------------------------------------------
(classOptions)?
// --------------------------------------------------
// Process optional class "tokens {...}" clause
// But only for lexers.
// --------------------------------------------------
( {grType=0}? classTokens)?
// --------------------------------------------------
// Process optional class "memberDecl {...}" clause
// --------------------------------------------------
(classMemberDecl)?
// --------------------------------------------------
// Well, the rules
// --------------------------------------------------
rules
// --------------------------------------------------
// Process optional class "memberDecl {...}" clause
// --------------------------------------------------
(classMemberDef)?
;
// --------------------------------------------------------
// classOptions
// --------------------------------------------------------
classOptions
: OPTIONS ( id ASSIGN optionValue SEMI )* RCURLY
;
// --------------------------------------------------------
// classTokens
// --------------------------------------------------------
classTokens
:
TOKENS
(
TOKENREF SEMI
| STRINGLIT SEMI
)*
RCURLY
;
// --------------------------------------------------------
// classMemberDecl
// --------------------------------------------------------
classMemberDecl
: "memberDecl" ACTION
;
// --------------------------------------------------------
// classMemberDef
// --------------------------------------------------------
classMemberDef
: "memberDef" ACTION
;
// --------------------------------------------------------
// rules
// --------------------------------------------------------
rules
: (rule)*
;
// --------------------------------------------------------
// ruleExceptionBlock
// --------------------------------------------------------
ruleExceptionBlock
: "except" ACTION
| "finally" ACTION
;
// --------------------------------------------------------
// altExceptionBlock
// --------------------------------------------------------
altExceptionBlock
: "except" ACTION
| "finally" ACTION
;
// --------------------------------------------------------
// rule
// --------------------------------------------------------
rule
:
// --------------------------------------------------
// Parse rule scope
// --------------------------------------------------
( "public"
| "protected"
| "private"
)?
// --------------------------------------------------
// Parse rule name
// --------------------------------------------------
id
// --------------------------------------------------
// Optional arguments
// --------------------------------------------------
(ARGACTION)?
// --------------------------------------------------
// Optional return type
// --------------------------------------------------
("returns" ARGACTION)?
// --------------------------------------------------
// Optional rule options
// --------------------------------------------------
(ruleOptions)?
// --------------------------------------------------
// Optional rule local variable declarations
// --------------------------------------------------
("local" ACTION)?
// --------------------------------------------------
// Optional rule init action
// --------------------------------------------------
(ACTION)?
// --------------------------------------------------
// Rule block
// --------------------------------------------------
COLON
block
SEMI
// --------------------------------------------------
// Optional exception handler
// --------------------------------------------------
(ruleExceptionBlock)?
;
// --------------------------------------------------------
// block
// --------------------------------------------------------
block
: alternative (OR alternative)*
;
// --------------------------------------------------------
// alternative
// --------------------------------------------------------
alternative
: (elem)*
(altExceptionBlock)?
;
// --------------------------------------------------------
// elem
// --------------------------------------------------------
elem
: element
;
// --------------------------------------------------------
// element
// --------------------------------------------------------
element
local
{
assignLabel : IdpgToken;
}
{
assignLabel := nil;
}
:
(
id ASSIGN
(id COLON)?
(
RULEREF (ARGACTION)? (BANG)?
| TOKENREF (ARGACTION)?
)
)
|
(assignLabel=id COLON)?
(
RULEREF (ARGACTION)? (BANG)?
| range[assignLabel]
| terminal[assignLabel]
| NOT (notTerminal[assignLabel] | ebnf[ assignLabel, true])
| ebnf[ assignLabel, false]
)
| ACTION
| SEMPRED
;
// --------------------------------------------------------
// range
// --------------------------------------------------------
range [pTokenLabel: IdpgToken]
local
:
CHARLIT RANGE CHARLIT
| (TOKENREF | STRINGLIT) RANGE (TOKENREF | STRINGLIT)
;
// --------------------------------------------------------
// terminal
// --------------------------------------------------------
terminal [pTokenLabel: IdpgToken]
:
CHARLIT (BANG)?
| TOKENREF (BANG)? (ARGACTION)?
| STRINGLIT (BANG)?
| WILDCARD (BANG)?
;
// --------------------------------------------------------
// notTerminal
// --------------------------------------------------------
notTerminal [pTokenLabel: IdpgToken]
: CHARLIT (BANG)?
| TOKENREF (BANG)?
;
// --------------------------------------------------------
// ebnf
// --------------------------------------------------------
ebnf [pTokenLabel: IdpgToken; pTokenNot: boolean]
: LPAREN
(
subRuleOptions (ACTION)? COLON
| ACTION COLON
)?
block
RPAREN
( QUEST
| STAR
| PLUS
| IMPLIES
)?
;
// --------------------------------------------------------
// subruleOptions
// --------------------------------------------------------
subruleOptions
: OPTIONS (id ASSIGN optionValue)* SEMI RCURLY
;
// --------------------------------------------------------
// ruleOptions
// --------------------------------------------------------
ruleOptions
: OPTIONS (id ASSIGN optionValue)* SEMI RCURLY
;
// --------------------------------------------------------
// optionValue
// --------------------------------------------------------
optionValue returns [IdpgToken]
: result=qualifiedId
| result:STRINGLIT
| result:CHARLIT
| result:INTEGER
;
// --------------------------------------------------------
// qualifiedId
// --------------------------------------------------------
qualifiedId returns [IdpgToken]
: id (WILDCARD id)*
;
// --------------------------------------------------------
// id
// --------------------------------------------------------
id returns [IdpgToken]
: result:TOKENREF
| result:RULEREF
;
\end{verbatim}
+77
View File
@@ -0,0 +1,77 @@
\section{Error handling}
All syntactic and semantic errors cause parser exceptions to be thrown. In
particular, the methods used to match tokens in the parser base class (match et
al) throw §EdpgMismatchedToken§. The methods in the lexer base class used to
match characters (match et al) throw analogous exceptions.
\subsection{DPG exception hierarchy}
DPG-generated parsers throw exceptions to signal recognition errors or other
stream problems. All exceptions derive from EdpgException. The hierarchy is the
following:
\begin{verbatim}
EdpgException
EdpgMismatchedChar
EdpgMismatchedToken
EdpgSemantic
\end{verbatim}
\subsubsection{EdpgException} The EdpgException exception class is the base of
all DPG generated exceptions. User defined exceptions must derive from this
class.
\subsubsection{EdpgMismatchedChar} This exception is thrown by the lexer when it
is looking for a character, but finds a different one on the input stream.
\subsubsection{EdpgMismatchedToken} This exception is thrown by the parser when
it is looking for a token, but finds a different one on the input token stream.
\subsubsection{EdpgSemantic} This exception is thrown by a validating semantic
predicate.
\subsection{Specifying exception handlers}
DPG allows to specify specific exception handler to a given rule or
alternative. The general form of an exception handler specification is:
\begin{verbatim}
... except { code to handle exception }
... finally { code to handle exception }
\end{verbatim}
\subsubsection{Exception handler for a rule}
The exception handler for a rule must be placed after the terminating
semicolon. The handler can be either an §except§ block or a §finally§ block.
The implementation of rule will be surrounded by a try block.
\begin{verbatim}
r : ...
;
except { handler code }
\end{verbatim}
\subsubsection{Exception handler for an alternative}
The exception handler of an alternative must be the last element of the
alternative. Both exception handler blocks can be used. Every alternative that
have exception block specified, will be surrounded by a §try...except/finally§
block.
\begin{verbatim}
r : alternative_1 ... except { handler code }
| alternative_2 ... finally { handler code }
...
| alternative_n
;
\end{verbatim}
\paragraph{Note:} It is not necessary to define exception handler for each alternative.
\subsubsection{Default error handler in lexer}
To skip every character that isn't recognized by any public lexer rule, specify
§filter=true§ option for a lexer. That way, the parser doesn't have to deal
with lexical errors and ask for another token.
View File
+53
View File
@@ -0,0 +1,53 @@
\chapter{Grammars}
\minitoc \clearpage
\section{Structure of a grammar}
The generic structure of a DPG grammar is the following:
\begin{itemize}
\item \emph{unit declaration}
\item \emph{unit sections}
\item \emph{grammar class definition}
\item \emph{grammar class sections}
\end{itemize}
\paragraph{Note:} the order of blocks cannot be changed!
\subsection{Unit declaration}
The $unit$~$declaration$ is always the first block in any DPG grammar. It
specifies the name of the target Pascal unit generated by DPG from the
grammar. The syntax is identical to that of Delphi.
\begin{alltt}
\textbf{unit} \emph{UnitName} ;
\end{alltt}
\subsection{Unit sections}
The $unit$~$sections$ block must follow the $unit$~$declaration$
block if it exists. The members of this block are optional, but
they must appear in the following order:
\begin{itemize}
\item \emph{uses section}
\item \emph{const section}
\item \emph{type section}
\end{itemize}
\subsection{Grammar class definition}
This block defines the type of the grammar class. The possible types are
§lexer§ and §parser§.
\begin{alltt}
\textbf{lexer} \emph{myLexer} ; // define lexer
\end{alltt}
or
\begin{alltt}
\textbf{parser} \emph{myParser} ; // define parser
\end{alltt}
\subsection{Grammar class sections}
This block may contain the following sections in the order
specified:
\begin{itemize}
\item \emph{options section}
\item \emph{tokens section} (only for lexers)
\item \emph{memberdecl section}
\item \emph{rule definitions}
\item \emph{memberdef section}
\end{itemize}
+63
View File
@@ -0,0 +1,63 @@
\chapter{Introduction}
\minitoc \clearpage
\section{Overview}
The Delphi Parser Generator is a language tool which automatically
generates $LL(k)$ parsers in Object Pascal Language based on an
intuitive grammar, similar to §EBNF§. The generated code mimics a
hand-written parser, so that it is easier to debug and leads to
shortened development time compared to state-machine based $LR$ or
DFA/NFA parsers. To compensate theoretical limitations of $LL(k)$
parsers, DPG features several powerful extensions enhancing its
functionality far beyond that of standard $LL(k)$ parsers. The
method of syntactic and semantic predicates makes the writing of
meta-parsers simple and routine. The philosophy of DPG is to allow
the programmer maximum control over the parsing process while
eliminating all the routine work.
\section{Features}
\begin{itemize}
\item[-] Delphi code generator for $LL(k)$ lexers and parsers.
\item[-] Intuitive and consistent EBNF like syntax for both the lexer and the parser generator
resulting in a shallow learning curve.
\item[-] Extremely easy-to-read generated code undistinguishable from hand-written
parsers. The inlined statements are properly indented relative to the surrounding
program code.
\item[-] Syntactic predicates allow for conditional parsing based on
formal syntactic conditions, enhancing the functionality of the $LL(k)$ parsers
considerably.
\item[-] Semantic predicates allow for conditional parsing based on
essentially arbitrary conditions. For example, a DOM-based XML parser is easily
written by semantic predicates using an internal hash-table representation of
the DOM. Using traditional state-machine based parsers (like §YACC§), programmers
often need to delegate parsing tasks to the hand written part of the code. This
burdens them with laborious and error-prone routine work. Semantic predicates
prevent this, since the parser is allowed to use run-time information for the
parsing process dynamically.
\item[-] Actions can be inserted in the rules at every possible place. These actions can be
used for controlling the parsing process with high granularity.
\item[-] All rules may have return values and arguments. Rule arguments add a powerful
metaparsing capability completing the predicate and action mechanism optimally.
\item[-] All rules may have a code initialization section. This special feature is tuned
for Pascal to allow the programmer to declare and initialize local variables for each rule.
\item[-] Many convenient extensions to the plain §BNF§ syntax, such as §(...)§, §(...)?§,
§(...)+§, §(...)*§, which simplify the task of writing grammars and makes it less
error-prone.
\item[-] Element complements allow for matching a text not matching a given rule.
\item[-] Element labels are used to directly map rule information
to Pascal variables. They provide a seamless interaction between the
generated and user-written code.
\item[-] Intuitive Graphical User Interface with syntax highlighting, and
project management capabilities.
\end{itemize}
\section{Installation}
The first step in using DPG is to install it in Delphi. However, before using
DPG be sure to read over the License Agreement.
\begin{itemize}
\item[-] run setup.exe and follow the instructions
\item[-] run Delphi and add your DPG run-time library directory to Delphi's
library path. For example, to do this for Delphi 6 select \emph{Tools} §|§ \emph{Environment Options}
on the menu bar. Go to the \emph{Library} tab and add the full path of your DPG run-time directory
to the \emph{Library Path} if you have not already done so.
\end{itemize}
+40
View File
@@ -0,0 +1,40 @@
\section{Atomic production elements}
\subsection{Character literal}
Single characters enclosed in quotes are character literals. A
character literal can only be referred to within a lexer rule. For
example, §'{'§ needs not be escaped as you are specifying the
literal character which is to be matched. Meta symbols are used
outside of characters and string literals to specify lexical
structure. Special characters can be specified in a similar way to
§C§ escape sequences. DPG accepts the following escape sequences:
§\n§, §\r§, §\t§, §\'§, §\"§, §\\§. The §#xx§ form is not accepted
by DPG.
\subsection{String literal}
String literals are sequences of characters enclosed in double quotes. The same
escape sequences can be used in string literals as in character literals.
In parser rules, strings represent tokens, and each unique string is assigned
to a token type. Referring to a string within a lexer rule matches the
indicated sequence of characters and is a shorthand notation. For example,
consider the following equivalent lexer rule definitions:
\begin{verbatim}
BEGIN : "begin";
BEGIN : 'b' 'e' 'g' 'i' 'n';
\end{verbatim}
\subsection{Wildcard}
The wildcard §.§ within a parser rule matches any single token;
within a lexer rule it matches any single character.
\subsection{Token reference}
Identifiers beginning with an uppercase letter are treated as
token references. The subsequent characters may be a mixture of
letters, digits or underscores. Referencing a token in a parser
rule implies that you want to recognize a token with the specified
token type. This does not actually call the associated lexer rule
-- the lexical analysis phase delivers a stream of tokens to the
parser. A token reference within a lexer rule implies a method
call to that rule, and carries the same analysis semantics as a
rule reference within a parser. So, you may specify rule arguments
and return values for non-public tokens and for every parser rule.
See the next section on rule references.
+77
View File
@@ -0,0 +1,77 @@
\section{Error handling}
All syntactic and semantic errors cause parser exceptions to be thrown. In
particular, the methods used to match tokens in the parser base class (match et
al) throw §EdpgMismatchedToken§. The methods in the lexer base class used to
match characters (match et al) throw analogous exceptions.
\subsection{DPG exception hierarchy}
DPG-generated parsers throw exceptions to signal recognition errors or other
stream problems. All exceptions derive from EdpgException. The hierarchy is the
following:
\begin{verbatim}
EdpgException
EdpgMismatchedChar
EdpgMismatchedToken
EdpgSemantic
\end{verbatim}
\subsubsection{EdpgException} The EdpgException exception class is the base of
all DPG generated exceptions. User defined exceptions must derive from this
class.
\subsubsection{EdpgMismatchedChar} This exception is thrown by the lexer when it
is looking for a character, but finds a different one on the input stream.
\subsubsection{EdpgMismatchedToken} This exception is thrown by the parser when
it is looking for a token, but finds a different one on the input token stream.
\subsubsection{EdpgSemantic} This exception is thrown by a validating semantic
predicate.
\subsection{Specifying exception handlers}
DPG allows to specify specific exception handler to a given rule or
alternative. The general form of an exception handler specification is:
\begin{verbatim}
... except { code to handle exception }
... finally { code to handle exception }
\end{verbatim}
\subsubsection{Exception handler for a rule}
The exception handler for a rule must be placed after the terminating
semicolon. The handler can be either an §except§ block or a §finally§ block.
The implementation of rule will be surrounded by a try block.
\begin{verbatim}
r : ...
;
except { handler code }
\end{verbatim}
\subsubsection{Exception handler for an alternative}
The exception handler of an alternative must be the last element of the
alternative. Both exception handler blocks can be used. Every alternative that
have exception block specified, will be surrounded by a §try...except/finally§
block.
\begin{verbatim}
r : alternative_1 ... except { handler code }
| alternative_2 ... finally { handler code }
...
| alternative_n
;
\end{verbatim}
\paragraph{Note:} It is not necessary to define exception handler for each alternative.
\subsubsection{Default error handler in lexer}
To skip every character that isn't recognized by any public lexer rule, specify
§filter=true§ option for a lexer. That way, the parser doesn't have to deal
with lexical errors and ask for another token.
+265
View File
@@ -0,0 +1,265 @@
\section{Options}
The §options{...}§ section is used to specify options for grammar
elements. i.e. elements are the lexer/parser classes, rules and
subrules. This section is preceded by the options keyword and
contains a series of option/value assignments surrounded by curly
braces.
\subsection{k}
\begin{table}[H]
\small
\begin{tabular}{rl}
\emph{synopsis:} & set lookahead depth \\
\emph{context:} & parser/lexer class declaration \\
\emph{type:} & integer \\
\emph{default:} & 1
\end{tabular}
\end{table}
For any grammar, the lookahead depth can be specified by using the $k$ option.
\begin{verbatim}
lexer myLexer;
options
{
k = 2;
}
\end{verbatim}
Setting the lookahead depth changes the maximum number of tokens that will be
examined to select alternative productions, and test for exit conditions of the
§EBNF§ constructs §(...)?§, §(...)+§, and §(...)*§. The lookahead analysis is
linear approximate (as opposed to full $LL(k)$ ). Consider this example with
$k=2$:
\begin{verbatim}
r : ( A B | B A )
| A A
;
\end{verbatim}
Full $LL(k)$ analysis would resolve the ambiguity and produce a
lookahead test for the first alternative like:
\begin{verbatim}
if (LA(1)=A and LA(2)=B) or (LA(1)=B and LA(2)=A)
\end{verbatim}
Linear approximate analysis would logically OR the lookahead sets at each
depth, resulting in a test like:
\begin{verbatim}
if (LA(1)=A or LA(1)=B) and (LA(2)=A or LA(2)=B)
\end{verbatim}
Which is ambiguous for the second alternative for §{A,A}§.
Therefore, setting the lookahead depth very high tends to yield
diminishing returns in most cases, because the lookahead sets at
large depths will include almost everything. This problem can be
solved using a syntactic predicate.
\subsection{importVocab}
\begin{table}[H]
\small
\begin{tabular}{rl}
\emph{synopsis:} & set initial grammar vocabulary \\
\emph{context:} & parser/lexer class declaration \\
\emph{type:} & ID \\
\emph{default:} & none
\end{tabular}
\end{table}
The import vocabulary for a grammar class can be specified using the
§importVocab§ option.
\begin{verbatim}
lexer myLexer;
options
{
importVocab = XML;
}
\end{verbatim}
DPG will look for the token exchange file named §XMLTokens.txt§,
and import all the token definitions from it. Parser grammar must
use this option, because without that, it cannot communicate with
the lexer. Lexer grammar can use this option too. It is useful,
when a parser class uses multiple lexers to get tokens from the
input stream. The vocabulary file has an identifier on the first
line that names the token vocabulary. All subsequent lines are of
the form §ID=value§ or §ID="literal"=value§. For example:
\begin{verbatim}
ThocLexer
TT_EOF = 1
TT_LPAREN = 4
TT_RPAREN = 5
LT_const = "const" = 6
\end{verbatim}
The token exchange file is automatically generated by DPG for each grammar.
\paragraph{Note:} you must take care of the order of grammars in a DPG project.
Vocabulary-generating grammars must appear before vocabulary-consuming
grammars.
\subsection{exportVocab}
\begin{table}[H]
\small
\begin{tabular}{rl}
\emph{synopsis:} & set export grammar vocabulary \\
\emph{context:} & parser/lexer class declaration \\
\emph{type:} & ID \\
\emph{default:} & grammar class name
\end{tabular}
\end{table}
The vocabulary of a grammar is the union of the set of tokens provided by an
§importVocab§ option and the set of tokens and literals defined in the grammar.
\begin{verbatim}
lexer myParser;
options
{
exportVocab = XML1;
}
\end{verbatim}
If the exportVocab options isn't specified, then DPG will use the
grammar class name to export the vocabulary. DPG generates the
following files for the examp\-le above: §XML1Tokens.txt§ for
token exchange, and XML1Tokens.pas for the grammar class.
\subsection{testLiterals}
\begin{table}[H]
\small
\begin{tabular}{rl}
\emph{context:} & lexer class declaration, lexer rule \\
\emph{type:} & boolean \\
\emph{default:} & false
\end{tabular}
\end{table}
By default, DPG doesn't generate code to check the literals table
(the table generated for literal strings), because checking the
literals table after each token recognition is expensive. Instead,
it checks string literals in a lexer rule, that can recognize
them. The string literals table contains the strings defined in
the §tokens{...}§ section of a lexer grammar.
\begin{verbatim}
lexer myLexer;
options
{
testLiterals = false;
}
tokens
{
"function";
"procedure";
...
}
ID
options
{
testLiterals = true;
}
: (A..Z | a..z)(A..Z | a..z | 0..9)*
;
\end{verbatim}
In the example above, if the input is matched by the rule §ID§
then the implementation of the rule will check the literals table
for the matched token. If it exists, then the returned token type
will be set to the token type assigned to the string literal in
the literals table. Otherwise the returned token type will remain
unchanged.
It is possible to check the literals table explicitly within an
action using the Test\-Li\-te\-ral method:
\begin{verbatim}
{
...
_ttype := TestLiteral;
_ttype := TestLiteral( _ttype);
...
}
\end{verbatim}
\subsection{caseSensitive}
\begin{table}[H]
\small
\begin{tabular}{rl}
\emph{context:} & lexer class declaration \\
\emph{type:} & boolean \\
\emph{default:} & false
\end{tabular}
\end{table}
\begin{verbatim}
lexer myLexer;
options
{
caseSensitive = true;
}
\end{verbatim}
Case is ignored when comparing against character and string literals in the
lexer. The case of the input stream is maintained when stored in the token
objects.
\subsection{filter}
\begin{table}[H]
\small
\begin{tabular}{rl}
\emph{context:} & lexer class declaration \\
\emph{type:} & boolean / ID \\
\emph{default:} & false
\end{tabular}
\end{table}
\begin{verbatim}
lexer myLexer;
options
{
filter = true;
}
\end{verbatim}
When §true§, the lexer ignores any input not exactly matching one of the public
lexer rules.
Notice that the filter rule must track new-lines in the general
case where the lexer might emit error messages.
When set to a rule name, the filter rule is invoked either when the lookahead
(in nextToken) predicts none of the public lexical rules or when one of those
rules fails. In the latter case, the input is rolled back before attempting
the filter rule. Option §filter=true§ is like having a filter rule such as:
\begin{verbatim}
IGNORE : . ;
\end{verbatim}
\subsection{ignore}
\begin{table}[H]
\small
\begin{tabular}{rl}
\emph{context:} & lexer rule \\
\emph{type:} & ID \\
\emph{default:} & none
\end{tabular}
\end{table}
\begin{verbatim}
lexer myLexer;
options
{
ignore = MyIgnoreRule;
}
\end{verbatim}
Specify a lexer rule to use a white space between lexical rule
atomic elements (chars, strings, and rule references). The grammar
analysis, and hence the look\-ahead sets, are aware of the
whitespace references.
+41
View File
@@ -0,0 +1,41 @@
\section{Production element operators}
\subsection{Element complement}
The unary not operator $\sim$ may be applied to an atomic element
such as a token identifier. For some token atom §T§, $\sim$§T§
matches any token other than §T§ except end-of-file. Within lexer
rules, $\sim$§'a'§ matches any character other than character
§'a'§. The sequence $\sim$§.§ (``not anything'') is meaningless
and not allowed. Example:
\begin{verbatim}
SL_COMMENT : "//" (~'\n')* '\n';
\end{verbatim}
\subsection{Set complement}
The unary not operator $\sim$ can also be used to construct a
token set or character set by complementing another set. This is
most useful when you want to match tokens or characters until a
certain delimiter set is encountered. Rather than invent a special
syntax for such sets, DPG allows the placement of $\sim$ in front
of a subrule containing only simple elements and no actions. The
simple elements may be token references, token ranges, character
literals, or character ranges. For example:
\begin{verbatim}
SL_COMMENT : "//" (~('\r'|'\n'))* ('\r'|'\n');
\end{verbatim}
\subsection{Range operator}
The binary range operator §..§ is used to define a range of atom
which may be matched. The expression §c1..c2§ in a lexer matches
characters included in that range. The expression §T..U§ in a
parser matches any token whose token type is inclusively in that
range, which is of dubious value if the token types are generated
externally.
\subsection{Ignore operator}
In lexer grammars, the ignore operator §!§ can be applied to any
atomic production element. It means that the element followed by
the §!§ operator should not appear in the result token. Example:
\begin{verbatim}
STRING : '"'! (~'"')* '"'! ;
\end{verbatim}
+82
View File
@@ -0,0 +1,82 @@
\section{Element labels}
Any atomic production element can be labeled by an identifier (case is insignificant).
For a labelled atomic element, the identifier is used within a semantic action to access
the associated Token object or character. For example,
\begin{verbatim}
assign
: v:ID EQUALS expr SEMI
{
writeln(Assign to + v.TokenText);
}
;
\end{verbatim}
\section{EBNF rule elements}
DPG supports the following extended BNF notations:
\begin{table}[H]
\small
\begin{tabular}{ll}
% \hline
§(...) § & -- exactly one occurrence of a subrule \\
§(...)?§ & -- zero or one occurrence of a subrule \\
§(...)+§ & -- one or more occurrence of a subrule \\
§(...)*§ & -- zero or more occurrence of a subrule
% \hline
\end{tabular}
\end{table}
\section{Rule arguments}
Character sequences in square brackets are arguments or return type specifiers.
Square brackets within string and character literals are not argument
delimiters. The arguments within §[]§ must follow the Object Pascal syntax.
\section{Exception handlers}
DPG allows the specification of exception handlers specific to a
given rule or alternative. The general form of an exception
handler specification is:
\begin{verbatim}
... except { code to handle exception }
... finally { code to handle exception }
\end{verbatim}
\subsection{Exception handler for a rule}
The exception handler for a rule must be placed after the
terminating semicolon. The handler can be either an §except§ block
or a §finally§ block. The implementation of a rule will be
surrounded by a try block.
\begin{verbatim}
r : ...
;
except { handler code }
\end{verbatim}
\subsection{Exception handler for an alternative}
The exception handler of an alternative must be the last element
of the alternative. Both exception handler blocks can be used.
Every alternative that has an exception block will be surrounded
by a §try...except/finally§ block.
\begin{verbatim}
r : alternative_1 ... except { handler code }
| alternative_2 ... finally { handler code }
...
| alternative_n
;
\end{verbatim}
\paragraph{Note:} It is not necessary to define an exception handler for each alternative.
\subsection{Default error handler in lexer}
To skip every character that isn't recognized by any public lexer
rule, specify the option §filter=true§ for a lexer. That way, the
parser doesn't have to deal with lexical errors and ask for
another token.
+249
View File
@@ -0,0 +1,249 @@
\section{Sections}
\subsection{unit}
The unit section specifies the unit name of the generated source file.
The syntax is identical to Object Pascal.
\subsection{uses}
The §uses{...}§ section is used to specify the units which must be
included in the interface's uses clause of the generated pascal
unit. Every unit name must be terminated by a semicolon. Repeated
units are included only once.
\begin{verbatim}
uses
{
Classes;
Windows;
}
\end{verbatim}
\subsection{const}
The §const{...}§ section is used to specify items that appear in
the interface's const clause of the generated pascal unit. The
content of this section is copied verbatim into the unit.
\begin{verbatim}
const
{
const1 = 12;
const2 = FOO;
}
\end{verbatim}
\subsection{type}
The §type{...}§ section is used to specify items that appear in
the interface's type clause of the generated pascal unit. The
content of this section is copied verbatim into the unit.
\begin{verbatim}
type
{
TmyType1 = integer;
TmyType2 = array [0..16] of TmyType1;
}
\end{verbatim}
\subsection{options}
The §options{...}§ section contains options for a given grammar
element. Options can be defined for lexer/parser classes, rules
and subrules.
\subsection{tokens}
If you need to define an ``imaginary'' token (i.e. one that has no
corresponding real input symbol) use the §tokens{...}§ section to
define them. You can also define literals in this section.
\begin{verbatim}
tokens
{
"procedure";
"function";
INTEGER;
}
\end{verbatim}
Strings defined in this way are treated just as if you had referenced them in
the parser. The formal syntax is:
\begin{verbatim}
tokenSpecification
: "tokens"
LCURLY
(tokenItem SEMI)*
RCURLY
;
tokenItem
: TOKEN
| STRING
;
\end{verbatim}
The §tokens{...}§ section is only valid in lexer grammars.
\subsection{memberdecl}
The §memberdecl{...}§ section contains additional member
declarations for the grammar class. It allows the expansion of the
grammar class with user defined members, so it is not necessary to
derive new classes from the generated class to implement
additional functionality. The content of this section is copied
verbatim into the class declaration of the generated grammar
class.
\begin{verbatim}
memberdecl
{
procedure proc1;
procedure proc2;
}
\end{verbatim}
\subsection{memberdef}
The §memberdef{...}§ section contains the implementation of the
classes' additional functionality. The content of this section is
copied verbatim into the implementation part of the generated
unit. This section may also contain the initialization and
finalization clauses.
\begin{verbatim}
memberdef
{
procedure TmyClass.proc1;
begin
...
end;
procedure TmyClass.proc2;
begin
...
end;
}
\end{verbatim}
\subsection{parser}
Parser rules must be associated with a parser class. Each parser
class specification precedes the options, and rule definitions of
the parser. Grammar files §.g§ can hold only one class definition.
A parser specification in a grammar file looks like:
\begin{verbatim}
unit myParser;
uses... // optional uses {...} section
const... // optional const {...} section
type... // optional type {...} section
parser TmyParser;
options... // optional options {...} section
memberdecl... // optional memberdecl {...} section
parser rules...
memberdef... // optional memberdef {...} section
\end{verbatim}
In the generated code, the parser class results in an Object
Pascal class, and the rules become member methods of the class.
Note, that the content of the §memberdecl{...}§ section is copied
verbatim into the class declaration part of the generated parser
class while the content of the §memberdef{...}§ section is copied
after the implementation of the member rules, so the
initialization and finalization clauses of a pascal unit can be
placed in the §memberdef{...}§ section.
\subsection{lexer}
To perform lexical analysis, you need to specify a lexer class that describes
how to break up the input character stream into a stream of tokens. The syntax
is similar to that of a parser class:
\begin{verbatim}
unit myLexer;
uses... // optional uses {...} section
const... // optional const {...} section
type... // optional type {...} section
lexer TmyLexer;
options... // optional options {...} section
tokens... // optional tokens {...} section
memberdecl... // optional memberdecl {...} section
lexer rules...
memberdef... // optional memberdef {...} section
\end{verbatim}
Lexical rules contained within a lexer class become member methods in the
generated class. A lexer grammar may have a §tokens{...}§ section to specify
imaginary tokens and string literals.
\subsection{rule definitions}
The structure of an input stream of atoms is specified by a set of
mutually-referenced rules. Each rule has a name and any of the
following optional attributes: a scope specifier; a set of
arguments; an init-action; a return value; local variable
definitions; an exception handler and an alternative or
alternatives. Each alternative contains a series of elements that
specify what to match and where. Scope can be specified by
private, protected, or public keywords. A rule has public scope by
default. The basic form of a rule is:
\begin{verbatim}
(scope) rulename
: alternative_1
| alternative_2
...
| alternative_n
;
\end{verbatim}
Parameters for a rule can be specified in the following form:
\begin{verbatim}
rulename [formal parameters] : ... ;
\end{verbatim}
If the rule returns a value, its type can be defined with the
returns keyword:
\begin{verbatim}
rulename returns [typename] : ... ;
\end{verbatim}
where §typename§ is a valid Object Pascal type specifier.
Local variables for a rule can be defined in the §local{...}§ section:
\begin{verbatim}
rule
local
{
foo: integer;
bar: string;
}
\end{verbatim}
Init-actions are specified before the colon. Init-actions differ from normal
actions because they are always executed regardless of guess mode.
\begin{verbatim}
rule
{
init-action
}
: ... ;
\end{verbatim}
\paragraph{Parser rules} apply structure to a stream of tokens, whereas
lexer rules apply structure to a stream of characters. Parser
rules, therefore, must not reference cha\-rac\-ter literals.
Double-quoted strings in parser rules are considered to be token
references. Note: all parser rules must begin with a lowercase
letter.
\paragraph{Lexer rules} defined within a lexer grammar must have a name beginning
with an uppercase letter. These rules implicitly match
cha\-rac\-ters on the input stream instead of tokens on the token
stream. Referenced grammar elements include token references
(implicit lexer rule references), cha\-rac\-ters and strings.
Lexer rules are processed in the same manner as parser rules, and
may also specify arguments and return values. A scope specifier
for a lexer rule has special meaning in lexer grammars. In the
generated Object Pascal unit, the lexer class has a §nextToken§
function which is the interface between the lexer and the parser.
This function is synthesized from the public lexer rules. It means
that non-public lexer rules don't modify the prediction logic of
the lexer. They are usually helper rules. If the lexer grammar has
no public rule at all, the §nextToken§ function returns EOF to the
parser.
+79
View File
@@ -0,0 +1,79 @@
\section{Simple production elements}
\subsection{Rule reference}
Identifiers beginning with lowercase letter are treated as parser
rule references. The subsequent characters may be any letter,
digit, number, or underscore. Lexical rules may not reference
parser rules. Referencing a rule implies a method call to that
rule at that point in the parse. You may pass parameters and
obtain return values. For example, formal and actual parameters
are specified within square brackets:
\begin{verbatim}
function
: type ID LPAREN args RPAREN block [1]
;
block [scope: integer]
: LCURLY
...
{ (* use arg 'scope' *) }
...
RCURLY
;
\end{verbatim}
Return values that are stored in variables use a simple assignment
notation:
\begin{verbatim}
set
local
{
ids : TStringList;
}
{
ids := nil;
}
: LPAREN ids=idList RPAREN
;
idList returns [TStringList]
{
result := TStringList.Create;
}
: id:ID { result.Add( id.TokenText;); }
(
COMMA id:ID
{
result.Add( id.TokenText;);
}
)*
;
\end{verbatim}
\subsection{Semantic action}
Actions are blocks of Object Pascal source code enclosed in curly braces. The
code is executed after the preceding production element has been recognized and
before the recognition of the following element. Actions are typically used to
generate out\-put, construct trees, or modify a symbol table. An action's
position dictates when it is recognized relative to the surrounding grammar
elements.
If the action is the first element of a production, it is executed
before any other e\-le\-ment in that production, but only if that
production is predicted by the lookahead.
The first action of an §EBNF§ subrule may be followed by §:§.
Doing so de\-sig\-na\-tes the action as an init-action and
associates it with the subrule as a whole, instead of any
production. It is executed immediately upon entering the subrule,
and is executed even while guessing (testing syntactic
predicates). For example:
\begin{verbatim}
( { init-action} :
{ action of 1st production} production1
| { action of 2nd production} production2
)?
\end{verbatim}
The init-action would be executed regardless of what (if anything)
matched in the optional subrule.
@@ -0,0 +1,49 @@
Delphi Parser Generator (DPG) uses the ASCII character set,
including the letters \emph{A} through \emph{Z} and \emph{a}
through \emph{z}, the digits \emph{0} through \emph{9}, and other
standard characters. It is case sensitive. The space character
(ASCII 32), the tab character (ASCII 9), and the new-line
characters (ASCII 13,10) are called \emph{white-space} characters.
\section{General}
\subsection{Comments}
DPG accepts single and multi-line comments. Single-line comments begin with
§//§ while multi-line (block) comments are enclosed by §(*§~and~§*)§.
\subsection{White Space}
Spaces, tabs, and new-lines (including most used §CR-LF§, §CR§,
§LF§ constructions) are separators in that they separate DPG
symbols, such as identifiers. White spaces have no additional
significance i.e. the code layout does not play any semantical
role. However the layout of the embedded Delphi code is preserved
in the ge\-ne\-ra\-ted source files.
\subsection{Symbols}
DPG uses the following punctuation and keywords:
\begin{table}[H]
\small
\begin{center}
\begin{tabular}{|ll|ll|}
\hline
§(...)§ & subrule & §unit§ & unit name \\
§(...)*§ & closure subrule & §uses§ & uses section \\
§(...)+§ & positive closure & §const§ & const section \\
§(...)?§ & optional subrule & §type§ & type section \\
§[...]§ & rule arguments & §lexer§ & lexer class \\
§{...}§ & semantic action & §parser§ & parser class \\
§{...}& semantic predicate & §options§ & options section \\
§(...)=>§ & syntactic predicate & §tokens§ & tokens section \\
§ |§ & alternative operator & §returns§ & rule return value \\
§ ..§ & range operator & §except§ & exception handler \\
§ ~§ & not operator & §finally§ & exception handler \\
§ !§ & ignore operator & §memberdecl§ & member declaration \\
§ .§ & wildcard & §memberdef§ & member definition \\
§ =§ & assignment operator & §local§ & local rule variables \\
§ :§ & label, start rule & & \\
§ ;§ & end rule & & \\
\hline
\end{tabular}
\end{center}
\caption{DPG symbols}
\end{table}
+10
View File
@@ -0,0 +1,10 @@
\chapter{Syntactic elements}
\minitoc
\clearpage
\include{src/lang/lang-syntactic}
\include{src/lang/lang-atomprod}
\include{src/lang/lang-simpprod}
\include{src/lang/lang-prodoper}
\include{src/lang/lang-sect}
\include{src/lang/lang-opt}
\include{src/lang/lang-rest}
+94
View File
@@ -0,0 +1,94 @@
\section{Error handling}
All syntactic and semantic errors throw exceptions. In particular,
the methods used to match tokens in the parser base class (match
etc) throw §EdpgMismatchedToken§. The methods in the lexer base
class used to match characters (match etc) throw exceptions
similarly.
\subsection{DPG exception hierarchy}
DPG-generated parsers throw exceptions to signal recognition
errors or other stream problems. All exceptions derive from
EdpgException. The hierarchy is as follows:
\begin{verbatim}
EdpgException
EdpgMismatchedChar
EdpgMismatchedToken
EdpgSemantic
\end{verbatim}
\subsection{EdpgException}
The §EdpgException§ is the base class for all DPG exceptions. It
defines the following read-only properties:
\begin{alltt}
FileName : string;
Line : integer;
Column : integer;
\end{alltt}
These properties contain information about the location where the exception
occurred.
\subsection{EdpgMismatchedChar}
The §EdpgMismatchedChar§ exception is thrown by the lexer when it
is looking for a character, but finds a different one on the input
stream than expected. It defines the following properties in
addition to those of §EdpgException§.
\begin{alltt}
FoundChar : char;
FoundString : string;
CharSet : TdpgCharSet;
Str : string;
Inverted : boolean;
\end{alltt}
The §FoundChar§ and §FoundString§ properties contain the character
or string that was found on the input stream. The §CharSet§ and
§Str§ properties contain the values which the lexer expected to
find. The §Inverted§ property is set only if the exception came
from a §MatchNot(...)§ operation. In this case, the §CharSet§
property contains the values, that the lexer must §not§ match. The
validity of pro\-per\-ti\-es are shown in the next table,
depending on the kind of exception.
\begin{table}[H]
\small
\begin{center}
\begin{tabular}{lcc}
& Mismatched char & Mismatched string \\
\hline
FoundChar & valid & - \\
FoundString & - & valid \\
CharSet & valid & - \\
Str & - & valid \\
Inverted & valid & - \\
\hline
\end{tabular}
\end{center}
\end{table}
\subsection{EdpgMismatchedToken}
The §EdpgMismatchedToken§ exception is thrown by the parser when
it is looking for a token, but finds a different one on the input
token stream than expected. It defines the following properties in
addition to those of §EdpgException§.
\begin{alltt}
FoundToken : IdpgToken;
TokenSet : TdpgByteSet;
Inverted : boolean;
\end{alltt}
The §FoundToken§ property contains the token the parser received from the
lexer. The §TokenSet§ property contains the vaules the parser expected to
get. The §Inverted§ property is set only if the exception came from a
§MatchNot(...)§ operation. In this case, the §TokenSet§ property contains the
values the parser must §not§ get.
\subsection{EdpgSemantic}
This exception is thrown by a validating semantic predicate. It
defines the following property in addition to those of
§EdpgException§.
\begin{alltt}
Assert : string;
\end{alltt}
The §Assert§ property contains the validating expression that caused the
exception.
+3
View File
@@ -0,0 +1,3 @@
\chapter{Run-time}
\minitoc \clearpage
\include{src/rt/rt-err}
+200
View File
@@ -0,0 +1,200 @@
\chapter{Getting started}
\minitoc \clearpage
In this chapter, we develop a simple calculator. It accepts integers, the four
arithmetic operators (§+§,§-§,§/§,§*§), and parenthesis on its input.
Spaces, tabs and newline characters are treated as white spaces and used for
separating tokens. Complete Expressions must be terminated by semicolons.
\section{Lexical analyzer}
Let us define the calculator's lexer.
\begin{verbatim}
1 unit myLexer;
2
3 lexer TmyLexer;
4 options
5 {
6 exportvocab = myLexer;
7 }
\end{verbatim}
In line §1§ we define the unit name of the generated Pascal source
file for the lexer. In line §3§ we give a name to the lexer class.
If there is an §options§ block for a grammar class, it must follow
the class declaration. Here, we define one option for the lexer:
§exportVocab§. This option tells the DPG that all the token
definitions must be exported to §myLexerTokens.txt§ and
§myLexerTokens.pas§. Grammars can import the generated token names
using the exported §.txt§ files.
\paragraph{Note:} it is not necessary to define the §exportVocab§ option for a
grammar. The file names for the token exchange files are automatically created
using the specified unit name.
Now we define the lexer tokens.
\begin{verbatim}
8 LPAREN: '(';
9 RPAREN: ')';
10 PLUS: '+';
11 MINUS: '-';
12 STAR: '*';
13 SLASH: '/';
14 SEMI: ';';
\end{verbatim}
In lines from §8§ to §14§, there are simple token definitions. Each of them
recognizes one character from the input stream.
\begin{verbatim}
15 INT: ('0'..'9')+ ;
\end{verbatim}
In line §15§, we define a rule to recognize integer numbers. This tells us that
the INT consists of one or more numeric characters.
Now, define a rule to handle white space characters.
\begin{verbatim}
16 WS
17 : '\r' '\n' { _ttype := TT_SKIP; }
18 | '\t' { _ttype := TT_SKIP; }
19 | ' ' { _ttype := TT_SKIP; }
20 ;
\end{verbatim}
Characters surrounded by curly braces are actions. The content of
an action block will be copied verbatim into the generated Pascal
source file. In this example the expression §_ttype := TT_SKIP;§
forbids the §WS§ rule to generate a token, because we don't need
it.
Now the lexer definition is finished. This simple lexer recognizes relevant
characters, integers and skips every white spaces on its input.
\section{Parser}
Now we define the parser.
\begin{verbatim}
1 unit myParser;
2
3 parser TmyParser;
4 options
5 {
6 importVocab = myLexer;
7 }
\end{verbatim}
This part is analogous to lexer definition with one exception. In
line §6§, we import the tokens from a file specified by the
§exportVocab§ option in the lexer grammar. Now, the parser knows
which tokens are to be expected from the lexer.
\begin{verbatim}
8 memberdecl
9 {
10 value: integer;
11 }
\end{verbatim}
In lines from §8§ to §11§, we specify the §memberdecl§ section. This section is
used to define members for the generated parser class. In this example, the §TmyParser§
class will have a member called §value§. We use this member to store the result
of the calculation for the current expression.
Now we define the rules.
\begin{verbatim}
12 calc
13 : (expression SEMI { writeln( value); } )*
14 ;
15
16 expression
17 local
18 {
19 temp : integer;
20 }
21 : term { temp := value; }
22 (
23 PLUS term { temp := temp + value; }
24 | MINUS term { temp := temp - value; }
25 )* { value := temp; }
26 ;
\end{verbatim}
In lines §17..20§, we define a local variable for the rule
§expression§. The following rules are defined in a similar way to
the rule §expression§.
\begin{verbatim}
27 term
28 local
29 {
30 temp : integer;
31 }
32 : factor { temp := value; }
33 (
34 STAR factor { temp := temp * value; }
35 | SLASH factor { temp := temp div value; }
36 )* { value := temp; }
37 ;
38
39 factor
40 local
41 {
42 temp : integer;
43 }
44 : uInt
45 | LPAREN expression RPAREN
46 ;
47
48 uInt
49 : x:INT { value := StrToInt( x.TokenText); }
50 ;
\end{verbatim}
In line §49§, we specified that the rule must have a variable
called 'x' which will contain the INT token. For the moment, it is
enough to know that it has a property §TokenText§ which contains
the text of the recognized token. This property is a string
property, so we have to convert it to an integer, and store it in
the §value§ member variable.
\section{The project}
The following simple project demonstrates how the defined lexer
and parser classes are used.
\begin{verbatim}
1 program calc;
2 {$APPTYPE CONSOLE}
3 uses
4 Classes,
5 SysUtils,
6 myLexer in 'myLexer.pas',
7 myParser in 'myParser.pas';
8
9 var
10 stm: TFileStream;
11 lex: TmyLexer;
12 par: TmyParser;
13
14 begin
15 if ParamCount <> 1 then
16 begin
17 writeln('usage: calc <filename>');
18 exit;
19 end
20 else
21 begin
22 try
23 stm := TFileStream.Create( ParamStr(1),
24 fmOpenRead);
24 lex := TmyLexer.Create(stm);
25 par := TmyParser.Create(lex);
26
27 par.calc;
28 except
29 on EdpgMismatchedToken do
22 writeln('Syntax error');
30 on EdpgMismatchedChar do
33 writeln('Syntax rrror');
29 end;
30 end;
31
32 stm.Free;
33 par.Free;
34 end.
\end{verbatim}
+281
View File
@@ -0,0 +1,281 @@
\chapter{Tokens}
\minitoc \clearpage
\section{Overview}
Tokens are the basic building blocks of any parser or compiler.
The task of a lexer (lexical analyzer, scanner) is to break up the
input character stream into a stream of tokens. Then §nextToken§
method of a lexer passes the next token to the parser, or throws
an exception if the next character on the input stream cannot be
matched by any of the public lexer rules. The §nextToken§ method
is always synthesized from the public lexer rules.
§Tokens§ in DPG are interface pointers. The interface type is §IdpgToken§,
which defines the following properties:
\begin{verbatim}
IdpgToken = interface
...
property TokenText : string;
property TokenType : integer;
property TokenLine : integer;
property TokenColumn : integer;
...
end;
\end{verbatim}
where §TokenText§ is the text matched by the lexer; §TokenType§ is
the type of token assigned to the token by DPG; §TokenLine§ is the
line number where the token starts in the input stream;
§TokenColumn§ is the column number.
Within parser rules, the input token can be accessed via this interface. To
obtain the interface to the recognized token, the reference to the token must
be prefixed by a label. For example,
\begin{verbatim}
...
x:NUMBER
{
...
LogMsg( 'Token: ' + x.TokenText );
LogMsg( 'Type: ' + IntToStr(x.TokenType));
...
}
...
\end{verbatim}
Note: Variables for labels are always generated by DPG, so you should not define
them in the §local{...}§ section of the rule.
\section{Defining tokens}
In DPG, tokens can be defined in the lexer grammars. DPG always
generates a token exchange file that describes all the token types
matched by the lexer. This file can be imported in a parser
grammar, so the lexer and parser have the same token types. Tokens
can be defined either,
\begin{itemize}
\item[-] via lexer rules, or
\item[-] in the lexer's §tokens{...}§ section
\end{itemize}
\subsubsection{Defining a token using a lexer rule}
The commonest method of defining a token is using a lexer rule. In
lexer grammars, every rule is associated with a §TokenType§ which
is determined by DPG at compile time. This value is assigned to
the result token by default, but it can be modified in the given
rule if needed. This is used mostly in rules that need runtime
information to set the type of the result token, but is otherwise
uncommon.
There is one exception: when a rule must not generate a token at all.
This is useful for defining comments or white-spaces for a grammar.
Every lexer rule has a local variable called §_ttype§. If
§_ttype§ has a value of §TT_SKIP§, then the rule won't generate any token. For
example,
\begin{verbatim}
SLCOMMENT : "//" ( ~'\n')* '\n' { _ttype := TT_SKIP; } ;
\end{verbatim}
The following examples are normal lexer rules, and they are typical in lexers:
\begin{verbatim}
LPAREN: '(';
RPAREN: ')';
DIGIT: '0'..'9';
NUMBER: DIGIT (DIGIT)*;
LETTER: 'a'..'Z' | 'A'..'Z';
ID: LETTER (LETTER | DIGIT | '_')*;
\end{verbatim}
\subsubsection{Defining a token in the tokens\{...\} section}
Lexer grammars may have a §tokens{...}§ section in the class
declaration. Within this section you can define ``imaginary''
tokens and string literals. These tokens are not ``real'' tokens
and cannot be referenced in lexer rules. ``Imaginary'' tokens are
helpful when a rule can recognize more than one type of token and
defining rules for these tokens would be ambiguous. For example,
\begin{verbatim}
tokens
{
STRING;
CHAR;
}
// ========================================================
// String or char
// ========================================================
STRING_OR_CHAR
: '\'' (~'\'' | '\'' '\'')* '\''
{
if TokenText = '''''' then _ttype := TT_STRING
else if TokenText = '''''''''' then _ttype := TT_CHAR
else if Length( TokenText) > 3 then _ttype := TT_STRING
else _ttype := TT_CHAR;
}
;
\end{verbatim}
The rule §STRING_OR_CHAR§ recognizes a pascal character literal,
and a pascal string literal. The code in the action block decides
which type of token must be created by the rule. Note: These
tokens are ``imaginary'' tokens. Referencing them in lexer
grammars is not possible, because they have no implementation.
Within parser rules, the tokens §STRING§ and §CHAR§ can be
referenced. But §STRING_OR_CHAR§ can't be referenced, because this
rule creates a §STRING§ or a §CHAR§ token.
\paragraph{String literals} in the §tokens{...}§ section are useful when the language
defines keywords. In this case you can list your language's keywords in this
section. They will be put into the lexer's literals table. The lexer will consult
this table in the following cases:
\begin{itemize}
\item[-] if the §testLiterals§ option for the lexer class is true, the lexer checks the
literals table after each recognized token,
\item[-] if the §testLiterals§ option for the lexer class is false, the
check will be executed in rules, that have this option set.
\end{itemize}
If neither lexer rules nor lexer class have this option set, the
lexer's literals table can be explicitly checked via the
§TestLiterals§ method. The advantage of using string literals is
that you can reference them in the parser as they are defined in
the §tokens{...}§ section. For example,
\begin{verbatim}
...
lexer TmyLexer;
options
{
testLiterals = true;
}
tokens
{
...
"function";
"procedure";
...
}
...
parser TmyParser;
rule1 : "function" ID SEMI;
rule2 : "procedure" ID LPAREN args RPAREN SEMI;
...
\end{verbatim}
In the above example we set the §testLiterals§ option to true for the lexer
class. This is not recommended, because the lexer will check the literals table
even if it found a non-string token. Instead, you have to check the table in a
rule that can recognize these literals. For example:
\begin{verbatim}
...
lexer TmyLexer;
...
ID
options
{
testLiterals=true;
}
: 'a'..'z' | 'A'..'Z' ('a'..'z' | 'A'..'Z' | '0'..'9')*
;
\end{verbatim}
Here the literals table will only be consulted in the rule §ID§.
This will improve the lexer's speed. Of course you can set the
§testLiterals§ options to true for as many rules as you want. All
of them will check the literals table.
\paragraph{Note:} The §testLiterals§ option has no effect for lexer rules.
\section{User defined token classes}
By default, DPG uses the §TdpgToken§ class to represent tokens.
This class is derived from §TInterfacedObject§, and implements the
§IdpgToken§ interface. This interface is used across the generated
code. To define a new token class you must derive your new class
from §TdpgToken§, implement your interface to access and
manipulate your object, and finally tell the lexer that it must
create your type of token object instead of the default
§TdpgToken§. After that, within the rules you must obtain the
interface of your class and use it. Let us have a more detailed
look at this:
1. Create a token class:
\begin{verbatim}
ImyToken = interface( IdpgToken)
[a guid definition]
function Get_MyString : string;
procedure Set_MyString( AString: string);
property MyString : string read Get_MyString
write Set_MyString;
end;
TmyToken = class( TdpgToken,
IdpgToken,
ImyToken)
protected
fMyString : string;
function Get_MyString : string;
procedure Set_MyString( AString: string);
public
constructor Create( pType: integer;
pText: string); override;
end;
constructor TmyToken.Create( pType: integer;
pText: string);
begin
inherited;
...
your code here
...
end;
function TmyToken.Get_MyString: string;
begin
result := fMyString;
end;
function TmyToken.Set_MyString( pString: string);
begin
fMyString := pString;
end;
\end{verbatim}
2. Tell to lexer that it must use our token class.
\begin{verbatim}
uses myToken;
...
myLexer.TokenClass := TmyToken;
\end{verbatim}
3. Use it in a rule.
\begin{verbatim}
...
parser TmyParser;
rule1
:
"procedure" x:id (LPAREN params RPAREN)?
{
(x as ImyToken).MyString := 'procid';
}
;
\end{verbatim}
\paragraph{Note:} You must cast the returned interface to your token interface,
because the §makeToken§ method of the lexer always returns an §IdpgToken§
interface and the labels specified to obtain a reference to a token are always
§IdpgToken§ references.
\paragraph{Note:} If you have to do special actions to initialize your token
class, you must have the same constructor as defined in the
example. The §makeToken§ method of the lexer always creates tokens
with this constructor. If you have another kind of constructor for
your token class, it won't be used by the lexer.
\clearpage
+88
View File
@@ -0,0 +1,88 @@
\NeedsTeXFormat{LaTeX2e}
\ProvidesClass{zlbook}
\LoadClass[a4paper,twoside,11pt]{book}
\usepackage{times}
\usepackage{chappg}
\usepackage{here}
\usepackage{alltt}
\usepackage[bookman]{quotchap}
\RequirePackage{shortvrb}
\MakeShortVerb{\§}
\let\o@verbatim\verbatim
\def\verbatim{%
\ifhmode\unskip\par\fi
% \nopagebreak % Overridden by list penalty
\ifx\@currsize\normalsize
\small
\fi
\o@verbatim
}
% No paragraph indentation, space between paragraphs
\setlength{\parindent}{0pt}
\setlength{\parskip}{\medskipamount}
\renewcommand{\thepage}{\thechapter\ - \arabic{page}}
\usepackage{fancyhdr}
\pagestyle{fancy}
%\addtolength{\headwidth}{0.5in}
%\addtolength{\headwidth}{\marginparsep}
%\addtolength{\headwidth}{\marginparwidth}
\renewcommand{\chaptermark}[1]{\markboth{#1}{}}
\renewcommand{\sectionmark}[1]{\markright{\thesection\ #1}}
\fancyhf{}
%\fancyhead[LE,RO]{\bfseries\thepage}
%\fancyhead[RO]{\bfseries\rightmark}
%\fancyhead[LE]{\bfseries\leftmark}
\fancyfoot[RO]{\bfseries\thepage}
\fancyfoot[LE]{\bfseries\thepage}
\fancyhead[RO]{\rightmark}
\fancyhead[LE]{\leftmark}
\fancyfoot[RO]{\thepage}
\fancyfoot[LE]{\thepage}
\fancypagestyle{plain}{%
\fancyhf{}
\renewcommand{\headrulewidth}{0pt}
\renewcommand{\footrulewidth}{0pt}
}
\renewcommand{\headrulewidth}{0.4pt}
\renewcommand{\footrulewidth}{0.4pt}
%\renewcommand{\normalsize}{\fontsize{10pt}{12pt}\selectfont}
\def\cleardoublepage{\clearpage\if@twoside \ifodd\c@page\else
\hbox{}
\vspace*{\fill}
\begin{center}
% This page intentionally left blank.
\end{center}
\vspace{\fill}
\thispagestyle{empty}
\newpage
\if@twocolumn\hbox{}\newpage\fi\fi\fi}
%\addtolength{\textwidth}{1cm}
\newenvironment{decl}[1][]%
{\par\small\addvspace{4.5ex plus 1ex}%
\vskip -\parskip
\ifx\relax#1\relax
\def\@decl@date{}%
\else
\def\@decl@date{\NEWfeature{#1}}%
\fi
\noindent\hspace{-\leftmargini}%
\begin{tabular}{|l|}\hline\ignorespaces}%
{\\\hline\end{tabular}\nobreak\@decl@date\par\nobreak
\vspace{2.3ex}\vskip -\parskip}
\newcommand{\NEWfeature}[1]{%
\hskip 1sp \marginpar{\small\sffamily\raggedright
New feature\\#1}}