589 lines
20 KiB
ObjectPascal
589 lines
20 KiB
ObjectPascal
// ============================================================================
|
|
// A CodeGenerator knows about a Grammar data structure and
|
|
// a grammar analyzer. The Grammar is walked to generate the
|
|
// appropriate code for both a parser and lexer (if present).
|
|
// This interface may change slightly so that the lexer is
|
|
// itself living inside of a Grammar object (in which case,
|
|
// this class generates only one recognizer). The main method
|
|
// to call is <tt>gen()</tt>, which initiates all code gen.
|
|
//
|
|
// The interaction of the code generator with the analyzer is
|
|
// simple: each subrule block calls deterministic() before generating
|
|
// code for the block. Method deterministic() sets lookahead caches
|
|
// in each Alternative object. Technically, a code generator
|
|
// doesn't need the grammar analyzer if all lookahead analysis
|
|
// is done at runtime, but this would result in a slower parser.
|
|
//
|
|
// This class provides a set of support utilities to handle argument
|
|
// list parsing and so on.
|
|
// ============================================================================
|
|
unit dpglib.CodeGenerator;
|
|
|
|
interface
|
|
uses
|
|
System.Classes,
|
|
dpgrtl.types,
|
|
dpglib.types;
|
|
|
|
|
|
type
|
|
// =========================================================================
|
|
// TCodeGenerator
|
|
// =========================================================================
|
|
TCodeGenerator = class( TInterfacedObject,
|
|
ICodeGenerator)
|
|
// ------------------------------------------------------------
|
|
// class functions (statics)
|
|
// ------------------------------------------------------------
|
|
class function encodeLexerRuleName( pName: AnsiString): AnsiString;
|
|
class function decodeLexerRuleName( pName: AnsiString): AnsiString;
|
|
|
|
protected
|
|
// ------------------------------------------------------------
|
|
// Members
|
|
// ------------------------------------------------------------
|
|
fGrammar : IGrammar;
|
|
fTool : ITool;
|
|
fAnalyzer : ILLkAnalyzer;
|
|
fBehavior : IGrammarBehavior;
|
|
fCharFormatter : ICharFormatter;
|
|
|
|
fFile : AnsiString;
|
|
fOutput : TStream;
|
|
|
|
fTabs : integer;
|
|
fBitsetUsed : TByteSet;
|
|
|
|
DEBUG_GEN : boolean;
|
|
|
|
fTab : AnsiString; // Tab sequence
|
|
fTokenTypesFileSuffix : AnsiString; // Token exchange file suffix
|
|
fTokenTypesFileExt : AnsiString; // Token exchange file extension
|
|
|
|
fLitPrefix : AnsiString; // Literal prefix
|
|
fTokPrefix : AnsiString; // Token prefix
|
|
|
|
fOutDir : AnsiString; // Output directory
|
|
fExcDir : AnsiString; // Token exchange dir
|
|
|
|
public
|
|
// ------------------------------------------------------------
|
|
// Constructor/destructor
|
|
// ------------------------------------------------------------
|
|
constructor Create( OutputDir: AnsiString;
|
|
ExchangeDir: AnsiString);
|
|
destructor Destroy; override;
|
|
|
|
protected
|
|
// ------------------------------------------------------------
|
|
// Internals
|
|
// ------------------------------------------------------------
|
|
procedure indent;
|
|
procedure _print( pString : AnsiString);
|
|
procedure _println( pString : AnsiString);
|
|
procedure print( pString : AnsiString);
|
|
procedure println( pString : AnsiString);
|
|
procedure printAction( pString : AnsiString);
|
|
|
|
procedure untabifyLines( pStrings : TStringList);
|
|
procedure indentLines( pStrings : TStringList);
|
|
|
|
procedure genTokenExchange;
|
|
|
|
protected
|
|
// ------------------------------------------------------------
|
|
// ICodeGenerator methods
|
|
// ------------------------------------------------------------
|
|
procedure Gen(pGrammar: IGrammar); overload; virtual;
|
|
// procedure Gen(pGrammar: ILexerGrammar); overload; virtual; abstract;
|
|
procedure Gen(pElem : IActionElem); overload; virtual; abstract;
|
|
procedure Gen(pElem : IAlternativeBlock); overload; virtual; abstract;
|
|
procedure Gen(pElem : IBlockEndElem); overload; virtual; abstract;
|
|
procedure Gen(pElem : ICharLiteralElem); overload; virtual; abstract;
|
|
procedure Gen(pElem : ICharRangeElem); overload; virtual; abstract;
|
|
procedure Gen(pElem : IGrammarAtom); overload; virtual; abstract;
|
|
procedure Gen(pElem : IOneOrMoreBlock); overload; virtual; abstract;
|
|
procedure Gen(pElem : INMBlock); overload; virtual; abstract;
|
|
procedure Gen(pElem : IRuleBlock); overload; virtual; abstract;
|
|
procedure Gen(pElem : IRuleEndElem); overload; virtual; abstract;
|
|
procedure Gen(pElem : IRuleRefElem); overload; virtual; abstract;
|
|
procedure Gen(pElem : IStringLiteralElem); overload; virtual; abstract;
|
|
procedure Gen(pElem : ISynPredBlock); overload; virtual; abstract;
|
|
procedure Gen(pElem : ITokenRefElem); overload; virtual; abstract;
|
|
procedure Gen(pElem : ITokenRangeElem); overload; virtual; abstract;
|
|
procedure Gen(pElem : ITreeElem); overload; virtual; abstract;
|
|
procedure Gen(pElem : IWildCardElem); overload; virtual; abstract;
|
|
procedure Gen(pElem : IZeroOrMoreBlock); overload; virtual; abstract;
|
|
end;
|
|
|
|
implementation
|
|
uses
|
|
dpglib.utils,
|
|
System.SysUtils,
|
|
System.StrUtils;
|
|
|
|
// ****************************************************************************
|
|
//
|
|
// Coonstructor/destructor
|
|
//
|
|
// ****************************************************************************
|
|
// ============================================================================
|
|
// Constructor
|
|
// ============================================================================
|
|
constructor TCodeGenerator.Create( OutputDir : AnsiString;
|
|
ExchangeDir : AnsiString);
|
|
begin
|
|
inherited Create;
|
|
|
|
fTab := ' ';
|
|
fTabs := 0;
|
|
|
|
fOutDir := OutputDir;
|
|
fExcDir := ExchangeDir;
|
|
|
|
if fOutDir <> '' then if fOutDir[Length(fOutDir)] <> '\' then fOutDir := fOutDir + '\';
|
|
if fExcDir <> '' then if fExcDir[Length(fExcDir)] <> '\' then fExcDir := fExcDir + '\';
|
|
|
|
fTokenTypesFileSuffix := 'Tokens';
|
|
fTokenTypesFileExt := '.txt';
|
|
|
|
fTokPrefix := 'TT_';
|
|
fLitPrefix := 'LT_';
|
|
end;
|
|
|
|
// ============================================================================
|
|
// Destructor
|
|
// ============================================================================
|
|
destructor TCodeGenerator.Destroy;
|
|
begin
|
|
fGrammar := nil;
|
|
fTool := nil;
|
|
fAnalyzer := nil;
|
|
fBehavior := nil;
|
|
fCharFormatter := nil;
|
|
|
|
inherited;
|
|
end;
|
|
|
|
// ****************************************************************************
|
|
//
|
|
// ICodeGenerator implementation
|
|
//
|
|
// ****************************************************************************
|
|
// ============================================================================
|
|
// Gen
|
|
// ============================================================================
|
|
procedure TCodeGenerator.Gen(pGrammar: IGrammar);
|
|
begin
|
|
end;
|
|
|
|
// ****************************************************************************
|
|
//
|
|
// Internal utility methods
|
|
//
|
|
// ****************************************************************************
|
|
// ============================================================================
|
|
// Indent
|
|
// ============================================================================
|
|
procedure TCodeGenerator.indent;
|
|
var
|
|
i: integer;
|
|
|
|
begin
|
|
for i:=1 to fTabs do
|
|
_print( fTab);
|
|
end;
|
|
|
|
// ============================================================================
|
|
// _print
|
|
// ============================================================================
|
|
procedure TCodeGenerator._print(pString: AnsiString);
|
|
begin
|
|
if pString = '' then
|
|
pString := ' ';
|
|
|
|
fOutput.Write( pString[1], Length(pString));
|
|
end;
|
|
|
|
// ============================================================================
|
|
// _println
|
|
// ============================================================================
|
|
procedure TCodeGenerator._println(pString: AnsiString);
|
|
begin
|
|
_print( pString);
|
|
_print( #13+#10);
|
|
end;
|
|
|
|
// ============================================================================
|
|
// print
|
|
// ============================================================================
|
|
procedure TCodeGenerator.print(pString: AnsiString);
|
|
begin
|
|
indent;
|
|
_print( pString);
|
|
end;
|
|
|
|
// ============================================================================
|
|
// prinln
|
|
// ============================================================================
|
|
procedure TCodeGenerator.println(pString: AnsiString);
|
|
begin
|
|
indent;
|
|
_println( pString);
|
|
end;
|
|
|
|
// ============================================================================
|
|
// printAction
|
|
// ============================================================================
|
|
procedure TCodeGenerator.printAction(pString: AnsiString);
|
|
var
|
|
i : integer;
|
|
len : integer;
|
|
indent: integer;
|
|
|
|
stm : TStringStream;
|
|
lines : TStringList;
|
|
line : AnsiString;
|
|
|
|
begin
|
|
// ---------------------------------------------------------------
|
|
// Remove the '{' and '}' or '[' and ']' characters from the
|
|
// beginning and from the end.
|
|
// ---------------------------------------------------------------
|
|
pString := Trim( pString);
|
|
pString := Copy(pString,2,Length(pString)-2);
|
|
|
|
// ---------------------------------------------------------------
|
|
// Make AnsiStringlist from the action text.
|
|
// ---------------------------------------------------------------
|
|
len := 0;
|
|
stm := TStringStream.Create( pString);
|
|
lines := TStringList.Create;
|
|
lines.LoadFromStream( stm);
|
|
stm.Free;
|
|
|
|
// ---------------------------------------------------------------
|
|
// Check that the line with the opening bracket (ie. the first)
|
|
// has code or not.
|
|
// ---------------------------------------------------------------
|
|
if Trim(lines.Strings[0]) = '' then
|
|
indent := 0
|
|
else
|
|
indent := 1;
|
|
|
|
// ---------------------------------------------------------------
|
|
// Remove empty lines from the beginning
|
|
// ---------------------------------------------------------------
|
|
while lines.Count > 0 do
|
|
begin
|
|
if Trim(lines.Strings[0]) = '' then
|
|
lines.Delete(0)
|
|
else
|
|
break;
|
|
end;
|
|
|
|
// ---------------------------------------------------------------
|
|
// Remove empty lines from the end
|
|
// ---------------------------------------------------------------
|
|
while lines.Count > 0 do
|
|
begin
|
|
if Trim( lines.Strings[lines.Count-1]) = '' then
|
|
lines.Delete(lines.Count-1)
|
|
else
|
|
break;
|
|
end;
|
|
|
|
// ---------------------------------------------------------------
|
|
// If there is a TAB character in the action AnsiString, then simply
|
|
// emit the AnsiString. Later we can format this too, but not yet.
|
|
// ---------------------------------------------------------------
|
|
untabifyLines( lines);
|
|
indentLines( lines);
|
|
|
|
// ------------------------------------------------------------
|
|
// Print out the lines, but calculate the indent value
|
|
// ------------------------------------------------------------
|
|
for i:=0 to lines.Count-1 do
|
|
begin
|
|
line := lines.Strings[i];
|
|
println(line);
|
|
end;
|
|
end;
|
|
|
|
// ============================================================================
|
|
// genTokenExchange
|
|
// ============================================================================
|
|
procedure TCodeGenerator.genTokenExchange;
|
|
var
|
|
i : integer;
|
|
fName : AnsiString;
|
|
name : AnsiString;
|
|
value : AnsiString;
|
|
ts : ITokenSymbol;
|
|
ss : IStringSymbol;
|
|
pTM : ITokenManager;
|
|
|
|
begin
|
|
if fGrammar.ExportVocab = '' then
|
|
fGrammar.ExportVocab := fGrammar.UnitName;
|
|
|
|
fName := fExcDir +
|
|
fGrammar.ExportVocab +
|
|
fTokenTypesFileSuffix +
|
|
fTokenTypesFileExt;
|
|
|
|
try
|
|
pTM := fGrammar.TokenManager;
|
|
fOutput := TFileStream.Create( fName, fmCreate);
|
|
|
|
// ------------------------------------------------------------
|
|
// Header
|
|
// ------------------------------------------------------------
|
|
println( '// $Delphi Parser Generator: ' +
|
|
ExtractFileName( fGrammar.GrammarFile) +
|
|
' -> ' +
|
|
fGrammar.GrammarFile +
|
|
fTokenTypesFileSuffix +
|
|
fTokenTypesFileExt +
|
|
'$');
|
|
|
|
fTabs := 0;
|
|
|
|
// ------------------------------------------------------------
|
|
// Header grammar name
|
|
// ------------------------------------------------------------
|
|
println( fGrammar.GetClassName);
|
|
|
|
// ------------------------------------------------------------
|
|
// Generate a definition to for each token type.
|
|
// ------------------------------------------------------------
|
|
for name in pTM.Vocabulary.Keys do
|
|
begin
|
|
value := IntToStr(pTM.Vocabulary.Items[name]);
|
|
ts := pTM.TokenSymbol[name];
|
|
ss := nil;
|
|
|
|
if ts <> nil then
|
|
ts.QueryInterface( IStringSymbol, ss);
|
|
|
|
if name[1] <> '<' then
|
|
begin
|
|
// ------------------------------------------------------
|
|
// Handle AnsiString symbols.
|
|
// ------------------------------------------------------
|
|
if name[1] = '"' then
|
|
begin
|
|
if ss <> nil then
|
|
begin
|
|
if ss.Lbl = '' then
|
|
begin
|
|
ss.Lbl := fLitPrefix + StringToID( Copy( name, 2, Length(name)-2));
|
|
|
|
// ss.Lbl := fLitPrefix + Copy( name, 2, Length(name)-2);
|
|
//
|
|
// ss.Lbl := AnsiReplaceStr( ss.LBL, '$', '_DOLLAR_');
|
|
// ss.Lbl := AnsiReplaceStr( ss.LBL, '/', '_SLASH_');
|
|
// ss.Lbl := AnsiReplaceStr( ss.LBL, ':', '_COLON_');
|
|
// ss.Lbl := AnsiReplaceStr( ss.LBL, '.', '_DOT_');
|
|
|
|
ss.LBL := AnsiReplaceStr( ss.LBL, 'LT__', 'LT_');
|
|
end;
|
|
|
|
println( ss.Lbl + '=' + name + '=' + value);
|
|
end;
|
|
end
|
|
|
|
// ------------------------------------------------------
|
|
// Handle token symbols.
|
|
// ------------------------------------------------------
|
|
else
|
|
begin
|
|
if ts <> nil then
|
|
begin
|
|
print( fTokPrefix + name);
|
|
|
|
if ts.Paraphrase <> '' then
|
|
_print( '(' + ts.Paraphrase + ')');
|
|
|
|
_println( '=' + value);
|
|
end
|
|
// else
|
|
// fGrammar.Tool.Error( 'Undefined token symbol' + name);
|
|
end
|
|
end
|
|
end;
|
|
|
|
(*
|
|
for i:=0 to pTM.Vocabulary.Count -1 do
|
|
begin
|
|
name := pTM.Vocabulary.Names[i];
|
|
value := pTM.Vocabulary.Values[name];
|
|
ts := pTM.TokenSymbol[name];
|
|
ss := nil;
|
|
|
|
if ts <> nil then
|
|
ts.QueryInterface( IStringSymbol, ss);
|
|
|
|
if name[1] <> '<' then
|
|
begin
|
|
// ------------------------------------------------------
|
|
// Handle AnsiString symbols.
|
|
// ------------------------------------------------------
|
|
if name[1] = '"' then
|
|
begin
|
|
if ss <> nil then
|
|
begin
|
|
if ss.Lbl = '' then
|
|
begin
|
|
ss.Lbl := fLitPrefix + StringToID( Copy( name, 2, Length(name)-2));
|
|
|
|
// ss.Lbl := fLitPrefix + Copy( name, 2, Length(name)-2);
|
|
//
|
|
// ss.Lbl := AnsiReplaceStr( ss.LBL, '$', '_DOLLAR_');
|
|
// ss.Lbl := AnsiReplaceStr( ss.LBL, '/', '_SLASH_');
|
|
// ss.Lbl := AnsiReplaceStr( ss.LBL, ':', '_COLON_');
|
|
// ss.Lbl := AnsiReplaceStr( ss.LBL, '.', '_DOT_');
|
|
|
|
ss.LBL := AnsiReplaceStr( ss.LBL, 'LT__', 'LT_');
|
|
end;
|
|
|
|
println( ss.Lbl + '=' + name + '=' + value);
|
|
end;
|
|
end
|
|
|
|
// ------------------------------------------------------
|
|
// Handle token symbols.
|
|
// ------------------------------------------------------
|
|
else
|
|
begin
|
|
if ts <> nil then
|
|
begin
|
|
print( fTokPrefix + name);
|
|
|
|
if ts.Paraphrase <> '' then
|
|
_print( '(' + ts.Paraphrase + ')');
|
|
|
|
_println( '=' + value);
|
|
end
|
|
// else
|
|
// fGrammar.Tool.Error( 'Undefined token symbol' + name);
|
|
end
|
|
end
|
|
end
|
|
*)
|
|
finally
|
|
if fOutput <> nil then
|
|
FreeAndNil( fOutput);
|
|
end;
|
|
end;
|
|
|
|
// ============================================================================
|
|
// encodeLexerRuleName
|
|
// ============================================================================
|
|
class function TCodeGenerator.encodeLexerRuleName( pName: AnsiString): AnsiString;
|
|
begin
|
|
result := 'm' + pName;
|
|
end;
|
|
|
|
// ============================================================================
|
|
// encodeLexerRuleName
|
|
// ============================================================================
|
|
class function TCodeGenerator.decodeLexerRuleName( pName: AnsiString): AnsiString;
|
|
begin
|
|
if pName[1] = 'm' then
|
|
result := Copy( pName, 2, Length(pName) -1)
|
|
else
|
|
result := pName;
|
|
end;
|
|
|
|
// ============================================================================
|
|
// indent
|
|
// ============================================================================
|
|
procedure TCodeGenerator.indentLines(pStrings: TStringList);
|
|
var
|
|
minLead : integer;
|
|
i : integer;
|
|
lead : integer;
|
|
s : AnsiString;
|
|
|
|
begin
|
|
if pStrings.Count = 1 then
|
|
begin
|
|
pStrings.Strings[0] := Trim( pStrings.Strings[0]);
|
|
exit;
|
|
end;
|
|
|
|
// ---------------------------------------------------------------
|
|
// search for the shortest leading size.
|
|
// ---------------------------------------------------------------
|
|
minLead := 1000;
|
|
|
|
for i:=0 to pStrings.Count -1 do
|
|
begin
|
|
s := pStrings.Strings[i];
|
|
lead := Length(s) - Length( TrimLeft( s));
|
|
|
|
if Trim(s) <> '' then
|
|
if minLead > lead then
|
|
minLead := lead;
|
|
end;
|
|
|
|
// ---------------------------------------------------------------
|
|
// Now shorten the lines
|
|
// ---------------------------------------------------------------
|
|
for i:=0 to pStrings.Count -1 do
|
|
begin
|
|
s := pStrings.Strings[i];
|
|
s := Copy( s, minLead +1, length(s));
|
|
pStrings.Strings[i] := s;
|
|
end;
|
|
end;
|
|
|
|
// ============================================================================
|
|
// untabify
|
|
// ============================================================================
|
|
procedure TCodeGenerator.untabifyLines(pStrings: TStringList);
|
|
var
|
|
i: integer;
|
|
j: integer;
|
|
k: integer;
|
|
l: integer;
|
|
p: integer;
|
|
s: AnsiString;
|
|
x: AnsiString;
|
|
|
|
begin
|
|
l := Length( fTab);
|
|
|
|
for i:=0 to pStrings.Count -1 do
|
|
begin
|
|
s := TrimRight(pStrings.Strings[i]);
|
|
pStrings.Strings[i] := s;
|
|
|
|
|
|
if pos( #9, s) > 0 then
|
|
begin
|
|
x := '';
|
|
|
|
for j:=1 to Length(s) do
|
|
begin
|
|
if s[j] <> #9 then
|
|
begin
|
|
x := x + s[j];
|
|
end
|
|
else
|
|
begin
|
|
p := l - (Length(x) mod l);
|
|
|
|
for k:=1 to p do
|
|
x := x + ' ';
|
|
end;
|
|
end;
|
|
|
|
pStrings.Strings[i] := x;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
end.
|