Files
bds.mr.dpg/src.lib/dpglib.CodeGenerator.pas
T
2026-01-03 18:33:48 +01:00

589 lines
20 KiB
ObjectPascal

// ============================================================================
// A CodeGenerator knows about a Grammar data structure and
// a grammar analyzer. The Grammar is walked to generate the
// appropriate code for both a parser and lexer (if present).
// This interface may change slightly so that the lexer is
// itself living inside of a Grammar object (in which case,
// this class generates only one recognizer). The main method
// to call is <tt>gen()</tt>, which initiates all code gen.
//
// The interaction of the code generator with the analyzer is
// simple: each subrule block calls deterministic() before generating
// code for the block. Method deterministic() sets lookahead caches
// in each Alternative object. Technically, a code generator
// doesn't need the grammar analyzer if all lookahead analysis
// is done at runtime, but this would result in a slower parser.
//
// This class provides a set of support utilities to handle argument
// list parsing and so on.
// ============================================================================
unit dpglib.CodeGenerator;
interface
uses
System.Classes,
dpgrtl.types,
dpglib.types;
type
// =========================================================================
// TCodeGenerator
// =========================================================================
TCodeGenerator = class( TInterfacedObject,
ICodeGenerator)
// ------------------------------------------------------------
// class functions (statics)
// ------------------------------------------------------------
class function encodeLexerRuleName( pName: AnsiString): AnsiString;
class function decodeLexerRuleName( pName: AnsiString): AnsiString;
protected
// ------------------------------------------------------------
// Members
// ------------------------------------------------------------
fGrammar : IGrammar;
fTool : ITool;
fAnalyzer : ILLkAnalyzer;
fBehavior : IGrammarBehavior;
fCharFormatter : ICharFormatter;
fFile : AnsiString;
fOutput : TStream;
fTabs : integer;
fBitsetUsed : TByteSet;
DEBUG_GEN : boolean;
fTab : AnsiString; // Tab sequence
fTokenTypesFileSuffix : AnsiString; // Token exchange file suffix
fTokenTypesFileExt : AnsiString; // Token exchange file extension
fLitPrefix : AnsiString; // Literal prefix
fTokPrefix : AnsiString; // Token prefix
fOutDir : AnsiString; // Output directory
fExcDir : AnsiString; // Token exchange dir
public
// ------------------------------------------------------------
// Constructor/destructor
// ------------------------------------------------------------
constructor Create( OutputDir: AnsiString;
ExchangeDir: AnsiString);
destructor Destroy; override;
protected
// ------------------------------------------------------------
// Internals
// ------------------------------------------------------------
procedure indent;
procedure _print( pString : AnsiString);
procedure _println( pString : AnsiString);
procedure print( pString : AnsiString);
procedure println( pString : AnsiString);
procedure printAction( pString : AnsiString);
procedure untabifyLines( pStrings : TStringList);
procedure indentLines( pStrings : TStringList);
procedure genTokenExchange;
protected
// ------------------------------------------------------------
// ICodeGenerator methods
// ------------------------------------------------------------
procedure Gen(pGrammar: IGrammar); overload; virtual;
// procedure Gen(pGrammar: ILexerGrammar); overload; virtual; abstract;
procedure Gen(pElem : IActionElem); overload; virtual; abstract;
procedure Gen(pElem : IAlternativeBlock); overload; virtual; abstract;
procedure Gen(pElem : IBlockEndElem); overload; virtual; abstract;
procedure Gen(pElem : ICharLiteralElem); overload; virtual; abstract;
procedure Gen(pElem : ICharRangeElem); overload; virtual; abstract;
procedure Gen(pElem : IGrammarAtom); overload; virtual; abstract;
procedure Gen(pElem : IOneOrMoreBlock); overload; virtual; abstract;
procedure Gen(pElem : INMBlock); overload; virtual; abstract;
procedure Gen(pElem : IRuleBlock); overload; virtual; abstract;
procedure Gen(pElem : IRuleEndElem); overload; virtual; abstract;
procedure Gen(pElem : IRuleRefElem); overload; virtual; abstract;
procedure Gen(pElem : IStringLiteralElem); overload; virtual; abstract;
procedure Gen(pElem : ISynPredBlock); overload; virtual; abstract;
procedure Gen(pElem : ITokenRefElem); overload; virtual; abstract;
procedure Gen(pElem : ITokenRangeElem); overload; virtual; abstract;
procedure Gen(pElem : ITreeElem); overload; virtual; abstract;
procedure Gen(pElem : IWildCardElem); overload; virtual; abstract;
procedure Gen(pElem : IZeroOrMoreBlock); overload; virtual; abstract;
end;
implementation
uses
dpglib.utils,
System.SysUtils,
System.StrUtils;
// ****************************************************************************
//
// Coonstructor/destructor
//
// ****************************************************************************
// ============================================================================
// Constructor
// ============================================================================
constructor TCodeGenerator.Create( OutputDir : AnsiString;
ExchangeDir : AnsiString);
begin
inherited Create;
fTab := ' ';
fTabs := 0;
fOutDir := OutputDir;
fExcDir := ExchangeDir;
if fOutDir <> '' then if fOutDir[Length(fOutDir)] <> '\' then fOutDir := fOutDir + '\';
if fExcDir <> '' then if fExcDir[Length(fExcDir)] <> '\' then fExcDir := fExcDir + '\';
fTokenTypesFileSuffix := 'Tokens';
fTokenTypesFileExt := '.txt';
fTokPrefix := 'TT_';
fLitPrefix := 'LT_';
end;
// ============================================================================
// Destructor
// ============================================================================
destructor TCodeGenerator.Destroy;
begin
fGrammar := nil;
fTool := nil;
fAnalyzer := nil;
fBehavior := nil;
fCharFormatter := nil;
inherited;
end;
// ****************************************************************************
//
// ICodeGenerator implementation
//
// ****************************************************************************
// ============================================================================
// Gen
// ============================================================================
procedure TCodeGenerator.Gen(pGrammar: IGrammar);
begin
end;
// ****************************************************************************
//
// Internal utility methods
//
// ****************************************************************************
// ============================================================================
// Indent
// ============================================================================
procedure TCodeGenerator.indent;
var
i: integer;
begin
for i:=1 to fTabs do
_print( fTab);
end;
// ============================================================================
// _print
// ============================================================================
procedure TCodeGenerator._print(pString: AnsiString);
begin
if pString = '' then
pString := ' ';
fOutput.Write( pString[1], Length(pString));
end;
// ============================================================================
// _println
// ============================================================================
procedure TCodeGenerator._println(pString: AnsiString);
begin
_print( pString);
_print( #13+#10);
end;
// ============================================================================
// print
// ============================================================================
procedure TCodeGenerator.print(pString: AnsiString);
begin
indent;
_print( pString);
end;
// ============================================================================
// prinln
// ============================================================================
procedure TCodeGenerator.println(pString: AnsiString);
begin
indent;
_println( pString);
end;
// ============================================================================
// printAction
// ============================================================================
procedure TCodeGenerator.printAction(pString: AnsiString);
var
i : integer;
len : integer;
indent: integer;
stm : TStringStream;
lines : TStringList;
line : AnsiString;
begin
// ---------------------------------------------------------------
// Remove the '{' and '}' or '[' and ']' characters from the
// beginning and from the end.
// ---------------------------------------------------------------
pString := Trim( pString);
pString := Copy(pString,2,Length(pString)-2);
// ---------------------------------------------------------------
// Make AnsiStringlist from the action text.
// ---------------------------------------------------------------
len := 0;
stm := TStringStream.Create( pString);
lines := TStringList.Create;
lines.LoadFromStream( stm);
stm.Free;
// ---------------------------------------------------------------
// Check that the line with the opening bracket (ie. the first)
// has code or not.
// ---------------------------------------------------------------
if Trim(lines.Strings[0]) = '' then
indent := 0
else
indent := 1;
// ---------------------------------------------------------------
// Remove empty lines from the beginning
// ---------------------------------------------------------------
while lines.Count > 0 do
begin
if Trim(lines.Strings[0]) = '' then
lines.Delete(0)
else
break;
end;
// ---------------------------------------------------------------
// Remove empty lines from the end
// ---------------------------------------------------------------
while lines.Count > 0 do
begin
if Trim( lines.Strings[lines.Count-1]) = '' then
lines.Delete(lines.Count-1)
else
break;
end;
// ---------------------------------------------------------------
// If there is a TAB character in the action AnsiString, then simply
// emit the AnsiString. Later we can format this too, but not yet.
// ---------------------------------------------------------------
untabifyLines( lines);
indentLines( lines);
// ------------------------------------------------------------
// Print out the lines, but calculate the indent value
// ------------------------------------------------------------
for i:=0 to lines.Count-1 do
begin
line := lines.Strings[i];
println(line);
end;
end;
// ============================================================================
// genTokenExchange
// ============================================================================
procedure TCodeGenerator.genTokenExchange;
var
i : integer;
fName : AnsiString;
name : AnsiString;
value : AnsiString;
ts : ITokenSymbol;
ss : IStringSymbol;
pTM : ITokenManager;
begin
if fGrammar.ExportVocab = '' then
fGrammar.ExportVocab := fGrammar.UnitName;
fName := fExcDir +
fGrammar.ExportVocab +
fTokenTypesFileSuffix +
fTokenTypesFileExt;
try
pTM := fGrammar.TokenManager;
fOutput := TFileStream.Create( fName, fmCreate);
// ------------------------------------------------------------
// Header
// ------------------------------------------------------------
println( '// $Delphi Parser Generator: ' +
ExtractFileName( fGrammar.GrammarFile) +
' -> ' +
fGrammar.GrammarFile +
fTokenTypesFileSuffix +
fTokenTypesFileExt +
'$');
fTabs := 0;
// ------------------------------------------------------------
// Header grammar name
// ------------------------------------------------------------
println( fGrammar.GetClassName);
// ------------------------------------------------------------
// Generate a definition to for each token type.
// ------------------------------------------------------------
for name in pTM.Vocabulary.Keys do
begin
value := IntToStr(pTM.Vocabulary.Items[name]);
ts := pTM.TokenSymbol[name];
ss := nil;
if ts <> nil then
ts.QueryInterface( IStringSymbol, ss);
if name[1] <> '<' then
begin
// ------------------------------------------------------
// Handle AnsiString symbols.
// ------------------------------------------------------
if name[1] = '"' then
begin
if ss <> nil then
begin
if ss.Lbl = '' then
begin
ss.Lbl := fLitPrefix + StringToID( Copy( name, 2, Length(name)-2));
// ss.Lbl := fLitPrefix + Copy( name, 2, Length(name)-2);
//
// ss.Lbl := AnsiReplaceStr( ss.LBL, '$', '_DOLLAR_');
// ss.Lbl := AnsiReplaceStr( ss.LBL, '/', '_SLASH_');
// ss.Lbl := AnsiReplaceStr( ss.LBL, ':', '_COLON_');
// ss.Lbl := AnsiReplaceStr( ss.LBL, '.', '_DOT_');
ss.LBL := AnsiReplaceStr( ss.LBL, 'LT__', 'LT_');
end;
println( ss.Lbl + '=' + name + '=' + value);
end;
end
// ------------------------------------------------------
// Handle token symbols.
// ------------------------------------------------------
else
begin
if ts <> nil then
begin
print( fTokPrefix + name);
if ts.Paraphrase <> '' then
_print( '(' + ts.Paraphrase + ')');
_println( '=' + value);
end
// else
// fGrammar.Tool.Error( 'Undefined token symbol' + name);
end
end
end;
(*
for i:=0 to pTM.Vocabulary.Count -1 do
begin
name := pTM.Vocabulary.Names[i];
value := pTM.Vocabulary.Values[name];
ts := pTM.TokenSymbol[name];
ss := nil;
if ts <> nil then
ts.QueryInterface( IStringSymbol, ss);
if name[1] <> '<' then
begin
// ------------------------------------------------------
// Handle AnsiString symbols.
// ------------------------------------------------------
if name[1] = '"' then
begin
if ss <> nil then
begin
if ss.Lbl = '' then
begin
ss.Lbl := fLitPrefix + StringToID( Copy( name, 2, Length(name)-2));
// ss.Lbl := fLitPrefix + Copy( name, 2, Length(name)-2);
//
// ss.Lbl := AnsiReplaceStr( ss.LBL, '$', '_DOLLAR_');
// ss.Lbl := AnsiReplaceStr( ss.LBL, '/', '_SLASH_');
// ss.Lbl := AnsiReplaceStr( ss.LBL, ':', '_COLON_');
// ss.Lbl := AnsiReplaceStr( ss.LBL, '.', '_DOT_');
ss.LBL := AnsiReplaceStr( ss.LBL, 'LT__', 'LT_');
end;
println( ss.Lbl + '=' + name + '=' + value);
end;
end
// ------------------------------------------------------
// Handle token symbols.
// ------------------------------------------------------
else
begin
if ts <> nil then
begin
print( fTokPrefix + name);
if ts.Paraphrase <> '' then
_print( '(' + ts.Paraphrase + ')');
_println( '=' + value);
end
// else
// fGrammar.Tool.Error( 'Undefined token symbol' + name);
end
end
end
*)
finally
if fOutput <> nil then
FreeAndNil( fOutput);
end;
end;
// ============================================================================
// encodeLexerRuleName
// ============================================================================
class function TCodeGenerator.encodeLexerRuleName( pName: AnsiString): AnsiString;
begin
result := 'm' + pName;
end;
// ============================================================================
// encodeLexerRuleName
// ============================================================================
class function TCodeGenerator.decodeLexerRuleName( pName: AnsiString): AnsiString;
begin
if pName[1] = 'm' then
result := Copy( pName, 2, Length(pName) -1)
else
result := pName;
end;
// ============================================================================
// indent
// ============================================================================
procedure TCodeGenerator.indentLines(pStrings: TStringList);
var
minLead : integer;
i : integer;
lead : integer;
s : AnsiString;
begin
if pStrings.Count = 1 then
begin
pStrings.Strings[0] := Trim( pStrings.Strings[0]);
exit;
end;
// ---------------------------------------------------------------
// search for the shortest leading size.
// ---------------------------------------------------------------
minLead := 1000;
for i:=0 to pStrings.Count -1 do
begin
s := pStrings.Strings[i];
lead := Length(s) - Length( TrimLeft( s));
if Trim(s) <> '' then
if minLead > lead then
minLead := lead;
end;
// ---------------------------------------------------------------
// Now shorten the lines
// ---------------------------------------------------------------
for i:=0 to pStrings.Count -1 do
begin
s := pStrings.Strings[i];
s := Copy( s, minLead +1, length(s));
pStrings.Strings[i] := s;
end;
end;
// ============================================================================
// untabify
// ============================================================================
procedure TCodeGenerator.untabifyLines(pStrings: TStringList);
var
i: integer;
j: integer;
k: integer;
l: integer;
p: integer;
s: AnsiString;
x: AnsiString;
begin
l := Length( fTab);
for i:=0 to pStrings.Count -1 do
begin
s := TrimRight(pStrings.Strings[i]);
pStrings.Strings[i] := s;
if pos( #9, s) > 0 then
begin
x := '';
for j:=1 to Length(s) do
begin
if s[j] <> #9 then
begin
x := x + s[j];
end
else
begin
p := l - (Length(x) mod l);
for k:=1 to p do
x := x + ' ';
end;
end;
pStrings.Strings[i] := x;
end;
end;
end;
end.