unit dpgrtl.lexer; interface uses System.Classes, System.SysUtils, System.AnsiStrings, dpgrtl.types, dpgrtl.token, dpgrtl.stringmap, dpgrtl.lexerstate; type TLexer = class( TInterfacedObject, ITokenStream) protected TokenText : AnsiString; fTokenClass : TTokenClass; fLexerState : ILexerState; fLiterals : TStringMap; fCaseSensitive : boolean; fSaveConsumedInput: boolean; fReturnToken : IToken; // ---------------------------------------------------------------------- // Property Handlers // ---------------------------------------------------------------------- private function GetTokenClass: TTokenClass; procedure SetTokenClass( AClass: TTokenClass); // ---------------------------------------------------------------------- // ITokenStream // ---------------------------------------------------------------------- protected function NextToken: IToken; virtual; abstract; protected // ------------------------------------------------------------------- procedure Append( AChar : AnsiChar); overload; procedure Append( AString : AnsiString); overload; // ------------------------------------------------------------------- procedure Consume; procedure ConsumeUntil( AChar : AnsiChar); overload; procedure ConsumeUntil( ASet : TCharSet); overload; // ------------------------------------------------------------------- procedure MatchNot( AChar : AnsiChar); procedure Match( AChar : AnsiChar); overload; procedure Match( ASet : TCharSet); overload; procedure Match( AString : AnsiString); overload; procedure Match( AChar1 : AnsiChar; AChar2 : AnsiChar); overload; // ------------------------------------------------------------------- function TestLiteral( AType : integer): integer; overload; function TestLiteral( AText : AnsiString; AType : integer): integer; overload; // ------------------------------------------------------------------- procedure Tab; virtual; procedure UponEof; virtual; procedure Initialize; virtual; procedure ReportError( e: Exception); virtual; // ------------------------------------------------------------------- function LA(i:integer): AnsiChar; function Mark: integer; procedure Rewind( Pos: integer); procedure Commit; procedure NewLine; procedure ResetText; // ------------------------------------------------------------------- function MakeToken( ATokenType : integer; ATokenText : AnsiString=''): IToken; public constructor Create( ABuffer: ICharBuffer); overload; constructor Create( AState : ILexerState); overload; constructor Create( AStream: TStream); overload; procedure AfterConstruction; override; procedure BeforeDestruction; override; public property SaveConsumedInput : boolean read fSaveConsumedInput write fSaveConsumedInput; property CaseSensitive : boolean read fCaseSensitive write fCaseSensitive; property TokenClass : TTokenClass read GetTokenClass write SetTokenClass; property InputState : ILexerState read fLexerState write fLexerState; property ReturnToken : IToken read fReturnToken write fReturnToken; end; TLexerClass = class of TLexer; implementation uses dpgrtl.exception; { TLexer } // @@@: Construction/destruction ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // // Construction/destruction // // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ================================================================================================ // Constructor(ICharBuffer) // ================================================================================================ constructor TLexer.Create( ABuffer: ICharBuffer); begin inherited Create; fLexerState := TLexerState.Create( ABuffer); end; // ================================================================================================ // Constructor(ILexerState) // ================================================================================================ constructor TLexer.Create( AState: ILexerState); begin inherited Create; fLexerState := AState end; // ================================================================================================ // Constructor(TStream) // ================================================================================================ constructor TLexer.Create( AStream: TStream); begin inherited Create; fLexerState := TLexerState.Create( AStream) end; // ================================================================================================ // After Construction // ================================================================================================ procedure TLexer.AfterConstruction; begin inherited; fLiterals := TStringMap.Create; fLiterals.CaseSensitive := true; fCaseSensitive := true; fSaveConsumedInput := true; Initialize; end; // ================================================================================================ // Before Destruction // ================================================================================================ procedure TLexer.BeforeDestruction; begin fLexerState := nil; fLiterals.Free; inherited end; // @@@: Property Handlers +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // // Property Handlers // // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ================================================================================================ // Get Token Class // ================================================================================================ function TLexer.GetTokenClass: TTokenClass; begin if fTokenClass = nil then result := TToken else result := fTokenClass end; // ================================================================================================ // Set Token Class // ================================================================================================ procedure TLexer.SetTokenClass( AClass: TTokenClass); begin fTokenClass := AClass end; // @@@: Internals +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // // Internals // // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ================================================================================================ // Append(Char) // ================================================================================================ procedure TLexer.Append( AChar: AnsiChar); begin if fSaveConsumedInput then TokenText := TokenText +AChar end; // ================================================================================================ // Append(String) // ================================================================================================ procedure TLexer.Append( AString: AnsiString); begin if fSaveConsumedInput then TokenText := TokenText +AString end; // ================================================================================================ // Consume // ================================================================================================ procedure TLexer.Consume; var chr: AnsiChar; begin chr := LA(1); if fLexerState.Guessing = 0 then begin Append(chr); if chr = #9 then TAB else fLexerState.Column := fLexerState.Column +1; end; fLexerState.InputBuffer.Consume end; // ================================================================================================ // ComsumeUntil(Char) // ================================================================================================ procedure TLexer.ConsumeUntil(AChar: AnsiChar); var chr: AnsiChar; begin chr := LA(1); while (chr <> EOF_CHAR) and (chr <> AChar) do begin Consume; chr := LA(1) end end; // ================================================================================================ // ConsumeUntil(Set) // ================================================================================================ procedure TLexer.ConsumeUntil(ASet: TCharSet); var chr: AnsiChar; begin chr := LA(1); while (chr <> EOF_CHAR) and not(chr in ASet) do begin Consume; chr := LA(1) end end; // ================================================================================================ // Match Not(Char) // ================================================================================================ procedure TLexer.MatchNot(AChar: AnsiChar); var chr : AnsiChar; la1 : AnsiChar; begin if not fCaseSensitive then begin chr := UpCase( AChar); la1 := UpCase( LA(1)); end else begin chr := AChar; la1 := LA(1); end; if la1 = chr then Raise EMismatchedChar.Create( LA(1), [AChar], fLexerState.FileName, fLexerState.Line, fLexerState.Column, true) else Consume end; // ================================================================================================ // Match(Char) // ================================================================================================ procedure TLexer.Match(AChar: AnsiChar); var chr : AnsiChar; la1 : AnsiChar; begin if not fCaseSensitive then begin chr := UpCase( AChar); la1 := UpCase( LA(1)); end else begin chr := AChar; la1 := LA(1); end; if la1 <> chr then Raise EMismatchedChar.Create( LA(1), [AChar], fLexerState.FileName, fLexerState.Line, fLexerState.Column, false) else Consume end; // ================================================================================================ // Match(CharSet) // ================================================================================================ procedure TLexer.Match(ASet: TCharSet); var la1: AnsiChar; begin la1 := LA(1); if not (la1 in ASet) then Raise EMismatchedChar.Create( la1, ASet, fLexerState.FileName, fLexerState.Line, fLexerState.Column, false) else Consume end; // ================================================================================================ // Match(Char..Char) // ================================================================================================ procedure TLexer.Match(AChar1, AChar2: AnsiChar); var chr1: AnsiChar; chr2: AnsiChar; la1 : AnsiChar; begin if not fCaseSensitive then begin chr1 := UpCase( AChar1); chr2 := UpCase( AChar2); la1 := UpCase( LA(1)); end else begin chr1 := AChar1; chr2 := AChar2; la1 := LA(1); end; if not (la1 in [chr1..chr2]) then Raise EMismatchedChar.Create( la1, [chr1..chr2], fLexerState.FileName, fLexerState.Line, fLexerState.Column, false) else Consume end; // ================================================================================================ // Match(string) // ================================================================================================ procedure TLexer.Match(AString: AnsiString); var la1 : AnsiChar; s : AnsiString; str : AnsiString; len : integer; i : integer; begin if not fCaseSensitive then str := AnsiUpperCase( AString) else str := AString; len := Length(str); s := ''; for i:=1 to len do begin if not fCaseSensitive then la1 := UpCase(LA(1)) else la1 := LA(1); s := s + la1; if la1 <> str[i] then Raise EMismatchedChar.Create( s, str, fLexerState.FileName, fLexerState.Line, fLexerState.Column) else Consume end; end; // ================================================================================================ // Test Literal // ================================================================================================ function TLexer.TestLiteral(AType: integer): integer; begin result := fLiterals.Value[TokenText]; if result < 0 then result := AType end; // ================================================================================================ // Test Literal // ================================================================================================ function TLexer.TestLiteral(AText: AnsiString; AType: integer): integer; begin result := fLiterals.Value[AText]; if result < 0 then result := AType end; // ================================================================================================ // Tab // // Advance the current column number by an appropriate amount. If you do not override this // to specify how much to jump for a tab, then tabs are counted as one char. This method is // called from consume(). // // update inputState->column as function of inputState->column and tab stops. For example, // if tab stops are columns 1 and 5 etc... and column is 3, then add 2 to column. // ================================================================================================ procedure TLexer.Tab; begin fLexerState.Column := fLexerState.Column +1; end; // ================================================================================================ // UponEof // // This method is called by YourLexer::nextToken() when the lexer has hit EOF condition. // EOF is NOT a character. This method is not called if EOF is reached during syntactic // predicate evaluation or during evaluation of normal lexical rules, which presumably // would be an IOException. This traps the "normal" EOF condition. // // UponEOF() is called after the complete evaluation of the previous token and only if your // parser asks for another token beyond that last non-EOF token. // // You might want to throw token or char stream exceptions like: "Heh, premature eof" or a // retry stream exception ("I found the end of this file, go back to referencing file"). // ================================================================================================ procedure TLexer.UponEof; begin end; // ================================================================================================ // Initialize // ================================================================================================ procedure TLexer.Initialize; begin end; // ================================================================================================ // ReportError // ================================================================================================ procedure TLexer.ReportError(e: Exception); begin end; // ================================================================================================ // LA // ================================================================================================ function TLexer.LA(i: integer): AnsiChar; begin result := fLexerState.InputBuffer.LA(i) end; // ================================================================================================ // Commit // ================================================================================================ procedure TLexer.Commit; begin fLexerState.InputBuffer.Commit end; // ================================================================================================ // Mark // ================================================================================================ function TLexer.Mark: integer; begin result := fLexerState.InputBuffer.Mark end; // ================================================================================================ // Pos // ================================================================================================ procedure TLexer.Rewind(Pos: integer); begin fLexerState.InputBuffer.Rewind(Pos) end; // ================================================================================================ // ResetText // ================================================================================================ procedure TLexer.ResetText; begin TokenText := ''; with fLexerState do begin TokenStartLine := Line; TokenStartColumn := Column; end end; // ================================================================================================ // NewLine // ================================================================================================ procedure TLexer.NewLine; begin with fLexerState do begin Line := Line +1; Column := 1; end end; // ================================================================================================ // MakeToken // ================================================================================================ function TLexer.MakeToken(ATokenType: integer; ATokenText: AnsiString): IToken; begin if fTokenClass = nil then fTokenClass := TToken; result := fTokenClass.Create( ATokenType, ATokenText); with result, fLexerState do begin TokenLine := Line; TokenColumn := Column end; end; end.