Files
bds.mr.dpg/src.rtl/dpgrtl.lexer.pas
T
2026-01-03 18:32:50 +01:00

594 lines
20 KiB
ObjectPascal

unit dpgrtl.lexer;
interface
uses
System.Classes,
System.SysUtils,
System.AnsiStrings,
dpgrtl.types,
dpgrtl.token,
dpgrtl.stringmap,
dpgrtl.lexerstate;
type
TLexer = class( TInterfacedObject, ITokenStream)
protected
TokenText : AnsiString;
fTokenClass : TTokenClass;
fLexerState : ILexerState;
fLiterals : TStringMap;
fCaseSensitive : boolean;
fSaveConsumedInput: boolean;
fReturnToken : IToken;
// ----------------------------------------------------------------------
// Property Handlers
// ----------------------------------------------------------------------
private
function GetTokenClass: TTokenClass;
procedure SetTokenClass( AClass: TTokenClass);
// ----------------------------------------------------------------------
// ITokenStream
// ----------------------------------------------------------------------
protected
function NextToken: IToken; virtual; abstract;
protected
// -------------------------------------------------------------------
procedure Append( AChar : AnsiChar); overload;
procedure Append( AString : AnsiString); overload;
// -------------------------------------------------------------------
procedure Consume;
procedure ConsumeUntil( AChar : AnsiChar); overload;
procedure ConsumeUntil( ASet : TCharSet); overload;
// -------------------------------------------------------------------
procedure MatchNot( AChar : AnsiChar);
procedure Match( AChar : AnsiChar); overload;
procedure Match( ASet : TCharSet); overload;
procedure Match( AString : AnsiString); overload;
procedure Match( AChar1 : AnsiChar;
AChar2 : AnsiChar); overload;
// -------------------------------------------------------------------
function TestLiteral( AType : integer): integer; overload;
function TestLiteral( AText : AnsiString;
AType : integer): integer; overload;
// -------------------------------------------------------------------
procedure Tab; virtual;
procedure UponEof; virtual;
procedure Initialize; virtual;
procedure ReportError( e: Exception); virtual;
// -------------------------------------------------------------------
function LA(i:integer): AnsiChar;
function Mark: integer;
procedure Rewind( Pos: integer);
procedure Commit;
procedure NewLine;
procedure ResetText;
// -------------------------------------------------------------------
function MakeToken( ATokenType : integer;
ATokenText : AnsiString=''): IToken;
public
constructor Create( ABuffer: ICharBuffer); overload;
constructor Create( AState : ILexerState); overload;
constructor Create( AStream: TStream); overload;
procedure AfterConstruction; override;
procedure BeforeDestruction; override;
public
property SaveConsumedInput : boolean read fSaveConsumedInput
write fSaveConsumedInput;
property CaseSensitive : boolean read fCaseSensitive
write fCaseSensitive;
property TokenClass : TTokenClass read GetTokenClass
write SetTokenClass;
property InputState : ILexerState read fLexerState
write fLexerState;
property ReturnToken : IToken read fReturnToken
write fReturnToken;
end;
TLexerClass = class of TLexer;
implementation
uses
dpgrtl.exception;
{ TLexer }
// @@@: Construction/destruction ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//
// Construction/destruction
//
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ================================================================================================
// Constructor(ICharBuffer)
// ================================================================================================
constructor TLexer.Create( ABuffer: ICharBuffer);
begin
inherited Create;
fLexerState := TLexerState.Create( ABuffer);
end;
// ================================================================================================
// Constructor(ILexerState)
// ================================================================================================
constructor TLexer.Create( AState: ILexerState);
begin
inherited Create;
fLexerState := AState
end;
// ================================================================================================
// Constructor(TStream)
// ================================================================================================
constructor TLexer.Create( AStream: TStream);
begin
inherited Create;
fLexerState := TLexerState.Create( AStream)
end;
// ================================================================================================
// After Construction
// ================================================================================================
procedure TLexer.AfterConstruction;
begin
inherited;
fLiterals := TStringMap.Create;
fLiterals.CaseSensitive := true;
fCaseSensitive := true;
fSaveConsumedInput := true;
Initialize;
end;
// ================================================================================================
// Before Destruction
// ================================================================================================
procedure TLexer.BeforeDestruction;
begin
fLexerState := nil;
fLiterals.Free;
inherited
end;
// @@@: Property Handlers +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//
// Property Handlers
//
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ================================================================================================
// Get Token Class
// ================================================================================================
function TLexer.GetTokenClass: TTokenClass;
begin
if fTokenClass = nil
then result := TToken
else result := fTokenClass
end;
// ================================================================================================
// Set Token Class
// ================================================================================================
procedure TLexer.SetTokenClass( AClass: TTokenClass);
begin
fTokenClass := AClass
end;
// @@@: Internals +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//
// Internals
//
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ================================================================================================
// Append(Char)
// ================================================================================================
procedure TLexer.Append( AChar: AnsiChar);
begin
if fSaveConsumedInput then
TokenText := TokenText +AChar
end;
// ================================================================================================
// Append(String)
// ================================================================================================
procedure TLexer.Append( AString: AnsiString);
begin
if fSaveConsumedInput then
TokenText := TokenText +AString
end;
// ================================================================================================
// Consume
// ================================================================================================
procedure TLexer.Consume;
var
chr: AnsiChar;
begin
chr := LA(1);
if fLexerState.Guessing = 0 then
begin
Append(chr);
if chr = #9
then TAB
else fLexerState.Column := fLexerState.Column +1;
end;
fLexerState.InputBuffer.Consume
end;
// ================================================================================================
// ComsumeUntil(Char)
// ================================================================================================
procedure TLexer.ConsumeUntil(AChar: AnsiChar);
var
chr: AnsiChar;
begin
chr := LA(1);
while (chr <> EOF_CHAR) and (chr <> AChar) do
begin
Consume;
chr := LA(1)
end
end;
// ================================================================================================
// ConsumeUntil(Set)
// ================================================================================================
procedure TLexer.ConsumeUntil(ASet: TCharSet);
var
chr: AnsiChar;
begin
chr := LA(1);
while (chr <> EOF_CHAR) and not(chr in ASet) do
begin
Consume;
chr := LA(1)
end
end;
// ================================================================================================
// Match Not(Char)
// ================================================================================================
procedure TLexer.MatchNot(AChar: AnsiChar);
var
chr : AnsiChar;
la1 : AnsiChar;
begin
if not fCaseSensitive then
begin
chr := UpCase( AChar);
la1 := UpCase( LA(1));
end
else begin
chr := AChar;
la1 := LA(1);
end;
if la1 = chr then
Raise EMismatchedChar.Create( LA(1),
[AChar],
fLexerState.FileName,
fLexerState.Line,
fLexerState.Column,
true)
else
Consume
end;
// ================================================================================================
// Match(Char)
// ================================================================================================
procedure TLexer.Match(AChar: AnsiChar);
var
chr : AnsiChar;
la1 : AnsiChar;
begin
if not fCaseSensitive then
begin
chr := UpCase( AChar);
la1 := UpCase( LA(1));
end
else begin
chr := AChar;
la1 := LA(1);
end;
if la1 <> chr then
Raise EMismatchedChar.Create( LA(1),
[AChar],
fLexerState.FileName,
fLexerState.Line,
fLexerState.Column,
false)
else
Consume
end;
// ================================================================================================
// Match(CharSet)
// ================================================================================================
procedure TLexer.Match(ASet: TCharSet);
var
la1: AnsiChar;
begin
la1 := LA(1);
if not (la1 in ASet) then
Raise EMismatchedChar.Create( la1,
ASet,
fLexerState.FileName,
fLexerState.Line,
fLexerState.Column,
false)
else
Consume
end;
// ================================================================================================
// Match(Char..Char)
// ================================================================================================
procedure TLexer.Match(AChar1, AChar2: AnsiChar);
var
chr1: AnsiChar;
chr2: AnsiChar;
la1 : AnsiChar;
begin
if not fCaseSensitive then
begin
chr1 := UpCase( AChar1);
chr2 := UpCase( AChar2);
la1 := UpCase( LA(1));
end
else begin
chr1 := AChar1;
chr2 := AChar2;
la1 := LA(1);
end;
if not (la1 in [chr1..chr2]) then
Raise EMismatchedChar.Create( la1,
[chr1..chr2],
fLexerState.FileName,
fLexerState.Line,
fLexerState.Column,
false)
else
Consume
end;
// ================================================================================================
// Match(string)
// ================================================================================================
procedure TLexer.Match(AString: AnsiString);
var
la1 : AnsiChar;
s : AnsiString;
str : AnsiString;
len : integer;
i : integer;
begin
if not fCaseSensitive
then str := AnsiUpperCase( AString)
else str := AString;
len := Length(str);
s := '';
for i:=1 to len do
begin
if not fCaseSensitive
then la1 := UpCase(LA(1))
else la1 := LA(1);
s := s + la1;
if la1 <> str[i] then
Raise EMismatchedChar.Create( s,
str,
fLexerState.FileName,
fLexerState.Line,
fLexerState.Column)
else
Consume
end;
end;
// ================================================================================================
// Test Literal
// ================================================================================================
function TLexer.TestLiteral(AType: integer): integer;
begin
result := fLiterals.Value[TokenText];
if result < 0 then
result := AType
end;
// ================================================================================================
// Test Literal
// ================================================================================================
function TLexer.TestLiteral(AText: AnsiString; AType: integer): integer;
begin
result := fLiterals.Value[AText];
if result < 0 then
result := AType
end;
// ================================================================================================
// Tab
//
// Advance the current column number by an appropriate amount. If you do not override this
// to specify how much to jump for a tab, then tabs are counted as one char. This method is
// called from consume().
//
// update inputState->column as function of inputState->column and tab stops. For example,
// if tab stops are columns 1 and 5 etc... and column is 3, then add 2 to column.
// ================================================================================================
procedure TLexer.Tab;
begin
fLexerState.Column := fLexerState.Column +1;
end;
// ================================================================================================
// UponEof
//
// This method is called by YourLexer::nextToken() when the lexer has hit EOF condition.
// EOF is NOT a character. This method is not called if EOF is reached during syntactic
// predicate evaluation or during evaluation of normal lexical rules, which presumably
// would be an IOException. This traps the "normal" EOF condition.
//
// UponEOF() is called after the complete evaluation of the previous token and only if your
// parser asks for another token beyond that last non-EOF token.
//
// You might want to throw token or char stream exceptions like: "Heh, premature eof" or a
// retry stream exception ("I found the end of this file, go back to referencing file").
// ================================================================================================
procedure TLexer.UponEof;
begin
end;
// ================================================================================================
// Initialize
// ================================================================================================
procedure TLexer.Initialize;
begin
end;
// ================================================================================================
// ReportError
// ================================================================================================
procedure TLexer.ReportError(e: Exception);
begin
end;
// ================================================================================================
// LA
// ================================================================================================
function TLexer.LA(i: integer): AnsiChar;
begin
result := fLexerState.InputBuffer.LA(i)
end;
// ================================================================================================
// Commit
// ================================================================================================
procedure TLexer.Commit;
begin
fLexerState.InputBuffer.Commit
end;
// ================================================================================================
// Mark
// ================================================================================================
function TLexer.Mark: integer;
begin
result := fLexerState.InputBuffer.Mark
end;
// ================================================================================================
// Pos
// ================================================================================================
procedure TLexer.Rewind(Pos: integer);
begin
fLexerState.InputBuffer.Rewind(Pos)
end;
// ================================================================================================
// ResetText
// ================================================================================================
procedure TLexer.ResetText;
begin
TokenText := '';
with fLexerState do
begin
TokenStartLine := Line;
TokenStartColumn := Column;
end
end;
// ================================================================================================
// NewLine
// ================================================================================================
procedure TLexer.NewLine;
begin
with fLexerState do
begin
Line := Line +1;
Column := 1;
end
end;
// ================================================================================================
// MakeToken
// ================================================================================================
function TLexer.MakeToken(ATokenType: integer; ATokenText: AnsiString): IToken;
begin
if fTokenClass = nil then
fTokenClass := TToken;
result := fTokenClass.Create( ATokenType, ATokenText);
with result, fLexerState do
begin
TokenLine := Line;
TokenColumn := Column
end;
end;
end.