// ============================================================================ // A CodeGenerator knows about a Grammar data structure and // a grammar analyzer. The Grammar is walked to generate the // appropriate code for both a parser and lexer (if present). // This interface may change slightly so that the lexer is // itself living inside of a Grammar object (in which case, // this class generates only one recognizer). The main method // to call is gen(), which initiates all code gen. // // The interaction of the code generator with the analyzer is // simple: each subrule block calls deterministic() before generating // code for the block. Method deterministic() sets lookahead caches // in each Alternative object. Technically, a code generator // doesn't need the grammar analyzer if all lookahead analysis // is done at runtime, but this would result in a slower parser. // // This class provides a set of support utilities to handle argument // list parsing and so on. // ============================================================================ unit dpglib.CodeGenerator; interface uses System.Classes, dpgrtl.types, dpglib.types; type // ========================================================================= // TCodeGenerator // ========================================================================= TCodeGenerator = class( TInterfacedObject, ICodeGenerator) // ------------------------------------------------------------ // class functions (statics) // ------------------------------------------------------------ class function encodeLexerRuleName( pName: AnsiString): AnsiString; class function decodeLexerRuleName( pName: AnsiString): AnsiString; protected // ------------------------------------------------------------ // Members // ------------------------------------------------------------ fGrammar : IGrammar; fTool : ITool; fAnalyzer : ILLkAnalyzer; fBehavior : IGrammarBehavior; fCharFormatter : ICharFormatter; fFile : AnsiString; fOutput : TStream; fTabs : integer; fBitsetUsed : TByteSet; DEBUG_GEN : boolean; fTab : AnsiString; // Tab sequence fTokenTypesFileSuffix : AnsiString; // Token exchange file suffix fTokenTypesFileExt : AnsiString; // Token exchange file extension fLitPrefix : AnsiString; // Literal prefix fTokPrefix : AnsiString; // Token prefix fOutDir : AnsiString; // Output directory fExcDir : AnsiString; // Token exchange dir public // ------------------------------------------------------------ // Constructor/destructor // ------------------------------------------------------------ constructor Create( OutputDir: AnsiString; ExchangeDir: AnsiString); destructor Destroy; override; protected // ------------------------------------------------------------ // Internals // ------------------------------------------------------------ procedure indent; procedure _print( pString : AnsiString); procedure _println( pString : AnsiString); procedure print( pString : AnsiString); procedure println( pString : AnsiString); procedure printAction( pString : AnsiString); procedure untabifyLines( pStrings : TStringList); procedure indentLines( pStrings : TStringList); procedure genTokenExchange; protected // ------------------------------------------------------------ // ICodeGenerator methods // ------------------------------------------------------------ procedure Gen(pGrammar: IGrammar); overload; virtual; // procedure Gen(pGrammar: ILexerGrammar); overload; virtual; abstract; procedure Gen(pElem : IActionElem); overload; virtual; abstract; procedure Gen(pElem : IAlternativeBlock); overload; virtual; abstract; procedure Gen(pElem : IBlockEndElem); overload; virtual; abstract; procedure Gen(pElem : ICharLiteralElem); overload; virtual; abstract; procedure Gen(pElem : ICharRangeElem); overload; virtual; abstract; procedure Gen(pElem : IGrammarAtom); overload; virtual; abstract; procedure Gen(pElem : IOneOrMoreBlock); overload; virtual; abstract; procedure Gen(pElem : INMBlock); overload; virtual; abstract; procedure Gen(pElem : IRuleBlock); overload; virtual; abstract; procedure Gen(pElem : IRuleEndElem); overload; virtual; abstract; procedure Gen(pElem : IRuleRefElem); overload; virtual; abstract; procedure Gen(pElem : IStringLiteralElem); overload; virtual; abstract; procedure Gen(pElem : ISynPredBlock); overload; virtual; abstract; procedure Gen(pElem : ITokenRefElem); overload; virtual; abstract; procedure Gen(pElem : ITokenRangeElem); overload; virtual; abstract; procedure Gen(pElem : ITreeElem); overload; virtual; abstract; procedure Gen(pElem : IWildCardElem); overload; virtual; abstract; procedure Gen(pElem : IZeroOrMoreBlock); overload; virtual; abstract; end; implementation uses dpglib.utils, System.SysUtils, System.StrUtils; // **************************************************************************** // // Coonstructor/destructor // // **************************************************************************** // ============================================================================ // Constructor // ============================================================================ constructor TCodeGenerator.Create( OutputDir : AnsiString; ExchangeDir : AnsiString); begin inherited Create; fTab := ' '; fTabs := 0; fOutDir := OutputDir; fExcDir := ExchangeDir; if fOutDir <> '' then if fOutDir[Length(fOutDir)] <> '\' then fOutDir := fOutDir + '\'; if fExcDir <> '' then if fExcDir[Length(fExcDir)] <> '\' then fExcDir := fExcDir + '\'; fTokenTypesFileSuffix := 'Tokens'; fTokenTypesFileExt := '.txt'; fTokPrefix := 'TT_'; fLitPrefix := 'LT_'; end; // ============================================================================ // Destructor // ============================================================================ destructor TCodeGenerator.Destroy; begin fGrammar := nil; fTool := nil; fAnalyzer := nil; fBehavior := nil; fCharFormatter := nil; inherited; end; // **************************************************************************** // // ICodeGenerator implementation // // **************************************************************************** // ============================================================================ // Gen // ============================================================================ procedure TCodeGenerator.Gen(pGrammar: IGrammar); begin end; // **************************************************************************** // // Internal utility methods // // **************************************************************************** // ============================================================================ // Indent // ============================================================================ procedure TCodeGenerator.indent; var i: integer; begin for i:=1 to fTabs do _print( fTab); end; // ============================================================================ // _print // ============================================================================ procedure TCodeGenerator._print(pString: AnsiString); begin if pString = '' then pString := ' '; fOutput.Write( pString[1], Length(pString)); end; // ============================================================================ // _println // ============================================================================ procedure TCodeGenerator._println(pString: AnsiString); begin _print( pString); _print( #13+#10); end; // ============================================================================ // print // ============================================================================ procedure TCodeGenerator.print(pString: AnsiString); begin indent; _print( pString); end; // ============================================================================ // prinln // ============================================================================ procedure TCodeGenerator.println(pString: AnsiString); begin indent; _println( pString); end; // ============================================================================ // printAction // ============================================================================ procedure TCodeGenerator.printAction(pString: AnsiString); var i : integer; len : integer; indent: integer; stm : TStringStream; lines : TStringList; line : AnsiString; begin // --------------------------------------------------------------- // Remove the '{' and '}' or '[' and ']' characters from the // beginning and from the end. // --------------------------------------------------------------- pString := Trim( pString); pString := Copy(pString,2,Length(pString)-2); // --------------------------------------------------------------- // Make AnsiStringlist from the action text. // --------------------------------------------------------------- len := 0; stm := TStringStream.Create( pString); lines := TStringList.Create; lines.LoadFromStream( stm); stm.Free; // --------------------------------------------------------------- // Check that the line with the opening bracket (ie. the first) // has code or not. // --------------------------------------------------------------- if Trim(lines.Strings[0]) = '' then indent := 0 else indent := 1; // --------------------------------------------------------------- // Remove empty lines from the beginning // --------------------------------------------------------------- while lines.Count > 0 do begin if Trim(lines.Strings[0]) = '' then lines.Delete(0) else break; end; // --------------------------------------------------------------- // Remove empty lines from the end // --------------------------------------------------------------- while lines.Count > 0 do begin if Trim( lines.Strings[lines.Count-1]) = '' then lines.Delete(lines.Count-1) else break; end; // --------------------------------------------------------------- // If there is a TAB character in the action AnsiString, then simply // emit the AnsiString. Later we can format this too, but not yet. // --------------------------------------------------------------- untabifyLines( lines); indentLines( lines); // ------------------------------------------------------------ // Print out the lines, but calculate the indent value // ------------------------------------------------------------ for i:=0 to lines.Count-1 do begin line := lines.Strings[i]; println(line); end; end; // ============================================================================ // genTokenExchange // ============================================================================ procedure TCodeGenerator.genTokenExchange; var i : integer; fName : AnsiString; name : AnsiString; value : AnsiString; ts : ITokenSymbol; ss : IStringSymbol; pTM : ITokenManager; begin if fGrammar.ExportVocab = '' then fGrammar.ExportVocab := fGrammar.UnitName; fName := fExcDir + fGrammar.ExportVocab + fTokenTypesFileSuffix + fTokenTypesFileExt; try pTM := fGrammar.TokenManager; fOutput := TFileStream.Create( fName, fmCreate); // ------------------------------------------------------------ // Header // ------------------------------------------------------------ println( '// $Delphi Parser Generator: ' + ExtractFileName( fGrammar.GrammarFile) + ' -> ' + fGrammar.GrammarFile + fTokenTypesFileSuffix + fTokenTypesFileExt + '$'); fTabs := 0; // ------------------------------------------------------------ // Header grammar name // ------------------------------------------------------------ println( fGrammar.GetClassName); // ------------------------------------------------------------ // Generate a definition to for each token type. // ------------------------------------------------------------ for name in pTM.Vocabulary.Keys do begin value := IntToStr(pTM.Vocabulary.Items[name]); ts := pTM.TokenSymbol[name]; ss := nil; if ts <> nil then ts.QueryInterface( IStringSymbol, ss); if name[1] <> '<' then begin // ------------------------------------------------------ // Handle AnsiString symbols. // ------------------------------------------------------ if name[1] = '"' then begin if ss <> nil then begin if ss.Lbl = '' then begin ss.Lbl := fLitPrefix + StringToID( Copy( name, 2, Length(name)-2)); // ss.Lbl := fLitPrefix + Copy( name, 2, Length(name)-2); // // ss.Lbl := AnsiReplaceStr( ss.LBL, '$', '_DOLLAR_'); // ss.Lbl := AnsiReplaceStr( ss.LBL, '/', '_SLASH_'); // ss.Lbl := AnsiReplaceStr( ss.LBL, ':', '_COLON_'); // ss.Lbl := AnsiReplaceStr( ss.LBL, '.', '_DOT_'); ss.LBL := AnsiReplaceStr( ss.LBL, 'LT__', 'LT_'); end; println( ss.Lbl + '=' + name + '=' + value); end; end // ------------------------------------------------------ // Handle token symbols. // ------------------------------------------------------ else begin if ts <> nil then begin print( fTokPrefix + name); if ts.Paraphrase <> '' then _print( '(' + ts.Paraphrase + ')'); _println( '=' + value); end // else // fGrammar.Tool.Error( 'Undefined token symbol' + name); end end end; (* for i:=0 to pTM.Vocabulary.Count -1 do begin name := pTM.Vocabulary.Names[i]; value := pTM.Vocabulary.Values[name]; ts := pTM.TokenSymbol[name]; ss := nil; if ts <> nil then ts.QueryInterface( IStringSymbol, ss); if name[1] <> '<' then begin // ------------------------------------------------------ // Handle AnsiString symbols. // ------------------------------------------------------ if name[1] = '"' then begin if ss <> nil then begin if ss.Lbl = '' then begin ss.Lbl := fLitPrefix + StringToID( Copy( name, 2, Length(name)-2)); // ss.Lbl := fLitPrefix + Copy( name, 2, Length(name)-2); // // ss.Lbl := AnsiReplaceStr( ss.LBL, '$', '_DOLLAR_'); // ss.Lbl := AnsiReplaceStr( ss.LBL, '/', '_SLASH_'); // ss.Lbl := AnsiReplaceStr( ss.LBL, ':', '_COLON_'); // ss.Lbl := AnsiReplaceStr( ss.LBL, '.', '_DOT_'); ss.LBL := AnsiReplaceStr( ss.LBL, 'LT__', 'LT_'); end; println( ss.Lbl + '=' + name + '=' + value); end; end // ------------------------------------------------------ // Handle token symbols. // ------------------------------------------------------ else begin if ts <> nil then begin print( fTokPrefix + name); if ts.Paraphrase <> '' then _print( '(' + ts.Paraphrase + ')'); _println( '=' + value); end // else // fGrammar.Tool.Error( 'Undefined token symbol' + name); end end end *) finally if fOutput <> nil then FreeAndNil( fOutput); end; end; // ============================================================================ // encodeLexerRuleName // ============================================================================ class function TCodeGenerator.encodeLexerRuleName( pName: AnsiString): AnsiString; begin result := 'm' + pName; end; // ============================================================================ // encodeLexerRuleName // ============================================================================ class function TCodeGenerator.decodeLexerRuleName( pName: AnsiString): AnsiString; begin if pName[1] = 'm' then result := Copy( pName, 2, Length(pName) -1) else result := pName; end; // ============================================================================ // indent // ============================================================================ procedure TCodeGenerator.indentLines(pStrings: TStringList); var minLead : integer; i : integer; lead : integer; s : AnsiString; begin if pStrings.Count = 1 then begin pStrings.Strings[0] := Trim( pStrings.Strings[0]); exit; end; // --------------------------------------------------------------- // search for the shortest leading size. // --------------------------------------------------------------- minLead := 1000; for i:=0 to pStrings.Count -1 do begin s := pStrings.Strings[i]; lead := Length(s) - Length( TrimLeft( s)); if Trim(s) <> '' then if minLead > lead then minLead := lead; end; // --------------------------------------------------------------- // Now shorten the lines // --------------------------------------------------------------- for i:=0 to pStrings.Count -1 do begin s := pStrings.Strings[i]; s := Copy( s, minLead +1, length(s)); pStrings.Strings[i] := s; end; end; // ============================================================================ // untabify // ============================================================================ procedure TCodeGenerator.untabifyLines(pStrings: TStringList); var i: integer; j: integer; k: integer; l: integer; p: integer; s: AnsiString; x: AnsiString; begin l := Length( fTab); for i:=0 to pStrings.Count -1 do begin s := TrimRight(pStrings.Strings[i]); pStrings.Strings[i] := s; if pos( #9, s) > 0 then begin x := ''; for j:=1 to Length(s) do begin if s[j] <> #9 then begin x := x + s[j]; end else begin p := l - (Length(x) mod l); for k:=1 to p do x := x + ' '; end; end; pStrings.Strings[i] := x; end; end; end; end.