594 lines
20 KiB
ObjectPascal
594 lines
20 KiB
ObjectPascal
unit dpgrtl.lexer;
|
|
|
|
interface
|
|
uses
|
|
System.Classes,
|
|
System.SysUtils,
|
|
System.AnsiStrings,
|
|
dpgrtl.types,
|
|
dpgrtl.token,
|
|
dpgrtl.stringmap,
|
|
dpgrtl.lexerstate;
|
|
|
|
type
|
|
TLexer = class( TInterfacedObject, ITokenStream)
|
|
protected
|
|
TokenText : AnsiString;
|
|
|
|
fTokenClass : TTokenClass;
|
|
fLexerState : ILexerState;
|
|
fLiterals : TStringMap;
|
|
|
|
fCaseSensitive : boolean;
|
|
fSaveConsumedInput: boolean;
|
|
|
|
fReturnToken : IToken;
|
|
|
|
// ----------------------------------------------------------------------
|
|
// Property Handlers
|
|
// ----------------------------------------------------------------------
|
|
private
|
|
function GetTokenClass: TTokenClass;
|
|
procedure SetTokenClass( AClass: TTokenClass);
|
|
|
|
// ----------------------------------------------------------------------
|
|
// ITokenStream
|
|
// ----------------------------------------------------------------------
|
|
protected
|
|
function NextToken: IToken; virtual; abstract;
|
|
|
|
protected
|
|
// -------------------------------------------------------------------
|
|
procedure Append( AChar : AnsiChar); overload;
|
|
procedure Append( AString : AnsiString); overload;
|
|
|
|
// -------------------------------------------------------------------
|
|
procedure Consume;
|
|
procedure ConsumeUntil( AChar : AnsiChar); overload;
|
|
procedure ConsumeUntil( ASet : TCharSet); overload;
|
|
|
|
// -------------------------------------------------------------------
|
|
procedure MatchNot( AChar : AnsiChar);
|
|
|
|
procedure Match( AChar : AnsiChar); overload;
|
|
procedure Match( ASet : TCharSet); overload;
|
|
procedure Match( AString : AnsiString); overload;
|
|
procedure Match( AChar1 : AnsiChar;
|
|
AChar2 : AnsiChar); overload;
|
|
|
|
// -------------------------------------------------------------------
|
|
function TestLiteral( AType : integer): integer; overload;
|
|
function TestLiteral( AText : AnsiString;
|
|
AType : integer): integer; overload;
|
|
|
|
// -------------------------------------------------------------------
|
|
procedure Tab; virtual;
|
|
procedure UponEof; virtual;
|
|
procedure Initialize; virtual;
|
|
procedure ReportError( e: Exception); virtual;
|
|
|
|
// -------------------------------------------------------------------
|
|
function LA(i:integer): AnsiChar;
|
|
function Mark: integer;
|
|
procedure Rewind( Pos: integer);
|
|
procedure Commit;
|
|
procedure NewLine;
|
|
procedure ResetText;
|
|
|
|
// -------------------------------------------------------------------
|
|
function MakeToken( ATokenType : integer;
|
|
ATokenText : AnsiString=''): IToken;
|
|
|
|
public
|
|
constructor Create( ABuffer: ICharBuffer); overload;
|
|
constructor Create( AState : ILexerState); overload;
|
|
constructor Create( AStream: TStream); overload;
|
|
|
|
procedure AfterConstruction; override;
|
|
procedure BeforeDestruction; override;
|
|
|
|
public
|
|
property SaveConsumedInput : boolean read fSaveConsumedInput
|
|
write fSaveConsumedInput;
|
|
|
|
property CaseSensitive : boolean read fCaseSensitive
|
|
write fCaseSensitive;
|
|
|
|
property TokenClass : TTokenClass read GetTokenClass
|
|
write SetTokenClass;
|
|
|
|
property InputState : ILexerState read fLexerState
|
|
write fLexerState;
|
|
|
|
property ReturnToken : IToken read fReturnToken
|
|
write fReturnToken;
|
|
end;
|
|
|
|
TLexerClass = class of TLexer;
|
|
|
|
implementation
|
|
uses
|
|
dpgrtl.exception;
|
|
|
|
{ TLexer }
|
|
|
|
|
|
|
|
// @@@: Construction/destruction ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
//
|
|
// Construction/destruction
|
|
//
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ================================================================================================
|
|
// Constructor(ICharBuffer)
|
|
// ================================================================================================
|
|
constructor TLexer.Create( ABuffer: ICharBuffer);
|
|
begin
|
|
inherited Create;
|
|
fLexerState := TLexerState.Create( ABuffer);
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// Constructor(ILexerState)
|
|
// ================================================================================================
|
|
constructor TLexer.Create( AState: ILexerState);
|
|
begin
|
|
inherited Create;
|
|
fLexerState := AState
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// Constructor(TStream)
|
|
// ================================================================================================
|
|
constructor TLexer.Create( AStream: TStream);
|
|
begin
|
|
inherited Create;
|
|
fLexerState := TLexerState.Create( AStream)
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// After Construction
|
|
// ================================================================================================
|
|
procedure TLexer.AfterConstruction;
|
|
begin
|
|
inherited;
|
|
|
|
fLiterals := TStringMap.Create;
|
|
fLiterals.CaseSensitive := true;
|
|
fCaseSensitive := true;
|
|
fSaveConsumedInput := true;
|
|
|
|
Initialize;
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// Before Destruction
|
|
// ================================================================================================
|
|
procedure TLexer.BeforeDestruction;
|
|
begin
|
|
fLexerState := nil;
|
|
fLiterals.Free;
|
|
|
|
inherited
|
|
end;
|
|
|
|
|
|
|
|
// @@@: Property Handlers +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
//
|
|
// Property Handlers
|
|
//
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ================================================================================================
|
|
// Get Token Class
|
|
// ================================================================================================
|
|
function TLexer.GetTokenClass: TTokenClass;
|
|
begin
|
|
if fTokenClass = nil
|
|
then result := TToken
|
|
else result := fTokenClass
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// Set Token Class
|
|
// ================================================================================================
|
|
procedure TLexer.SetTokenClass( AClass: TTokenClass);
|
|
begin
|
|
fTokenClass := AClass
|
|
end;
|
|
|
|
|
|
|
|
// @@@: Internals +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
//
|
|
// Internals
|
|
//
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ================================================================================================
|
|
// Append(Char)
|
|
// ================================================================================================
|
|
procedure TLexer.Append( AChar: AnsiChar);
|
|
begin
|
|
if fSaveConsumedInput then
|
|
TokenText := TokenText +AChar
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// Append(String)
|
|
// ================================================================================================
|
|
procedure TLexer.Append( AString: AnsiString);
|
|
begin
|
|
if fSaveConsumedInput then
|
|
TokenText := TokenText +AString
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// Consume
|
|
// ================================================================================================
|
|
procedure TLexer.Consume;
|
|
var
|
|
chr: AnsiChar;
|
|
|
|
begin
|
|
chr := LA(1);
|
|
|
|
if fLexerState.Guessing = 0 then
|
|
begin
|
|
Append(chr);
|
|
|
|
if chr = #9
|
|
then TAB
|
|
else fLexerState.Column := fLexerState.Column +1;
|
|
end;
|
|
|
|
fLexerState.InputBuffer.Consume
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// ComsumeUntil(Char)
|
|
// ================================================================================================
|
|
procedure TLexer.ConsumeUntil(AChar: AnsiChar);
|
|
var
|
|
chr: AnsiChar;
|
|
|
|
begin
|
|
chr := LA(1);
|
|
|
|
while (chr <> EOF_CHAR) and (chr <> AChar) do
|
|
begin
|
|
Consume;
|
|
chr := LA(1)
|
|
end
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// ConsumeUntil(Set)
|
|
// ================================================================================================
|
|
procedure TLexer.ConsumeUntil(ASet: TCharSet);
|
|
var
|
|
chr: AnsiChar;
|
|
|
|
begin
|
|
chr := LA(1);
|
|
|
|
while (chr <> EOF_CHAR) and not(chr in ASet) do
|
|
begin
|
|
Consume;
|
|
chr := LA(1)
|
|
end
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// Match Not(Char)
|
|
// ================================================================================================
|
|
procedure TLexer.MatchNot(AChar: AnsiChar);
|
|
var
|
|
chr : AnsiChar;
|
|
la1 : AnsiChar;
|
|
|
|
begin
|
|
if not fCaseSensitive then
|
|
begin
|
|
chr := UpCase( AChar);
|
|
la1 := UpCase( LA(1));
|
|
end
|
|
|
|
else begin
|
|
chr := AChar;
|
|
la1 := LA(1);
|
|
end;
|
|
|
|
if la1 = chr then
|
|
Raise EMismatchedChar.Create( LA(1),
|
|
[AChar],
|
|
fLexerState.FileName,
|
|
fLexerState.Line,
|
|
fLexerState.Column,
|
|
true)
|
|
else
|
|
Consume
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// Match(Char)
|
|
// ================================================================================================
|
|
procedure TLexer.Match(AChar: AnsiChar);
|
|
var
|
|
chr : AnsiChar;
|
|
la1 : AnsiChar;
|
|
|
|
begin
|
|
if not fCaseSensitive then
|
|
begin
|
|
chr := UpCase( AChar);
|
|
la1 := UpCase( LA(1));
|
|
end
|
|
|
|
else begin
|
|
chr := AChar;
|
|
la1 := LA(1);
|
|
end;
|
|
|
|
if la1 <> chr then
|
|
Raise EMismatchedChar.Create( LA(1),
|
|
[AChar],
|
|
fLexerState.FileName,
|
|
fLexerState.Line,
|
|
fLexerState.Column,
|
|
false)
|
|
else
|
|
Consume
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// Match(CharSet)
|
|
// ================================================================================================
|
|
procedure TLexer.Match(ASet: TCharSet);
|
|
var
|
|
la1: AnsiChar;
|
|
|
|
begin
|
|
la1 := LA(1);
|
|
|
|
if not (la1 in ASet) then
|
|
Raise EMismatchedChar.Create( la1,
|
|
ASet,
|
|
fLexerState.FileName,
|
|
fLexerState.Line,
|
|
fLexerState.Column,
|
|
false)
|
|
else
|
|
Consume
|
|
end;
|
|
|
|
|
|
// ================================================================================================
|
|
// Match(Char..Char)
|
|
// ================================================================================================
|
|
procedure TLexer.Match(AChar1, AChar2: AnsiChar);
|
|
var
|
|
chr1: AnsiChar;
|
|
chr2: AnsiChar;
|
|
la1 : AnsiChar;
|
|
|
|
begin
|
|
if not fCaseSensitive then
|
|
begin
|
|
chr1 := UpCase( AChar1);
|
|
chr2 := UpCase( AChar2);
|
|
la1 := UpCase( LA(1));
|
|
end
|
|
|
|
else begin
|
|
chr1 := AChar1;
|
|
chr2 := AChar2;
|
|
la1 := LA(1);
|
|
end;
|
|
|
|
if not (la1 in [chr1..chr2]) then
|
|
Raise EMismatchedChar.Create( la1,
|
|
[chr1..chr2],
|
|
fLexerState.FileName,
|
|
fLexerState.Line,
|
|
fLexerState.Column,
|
|
false)
|
|
else
|
|
Consume
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// Match(string)
|
|
// ================================================================================================
|
|
procedure TLexer.Match(AString: AnsiString);
|
|
var
|
|
la1 : AnsiChar;
|
|
s : AnsiString;
|
|
str : AnsiString;
|
|
len : integer;
|
|
i : integer;
|
|
|
|
begin
|
|
if not fCaseSensitive
|
|
then str := AnsiUpperCase( AString)
|
|
else str := AString;
|
|
|
|
len := Length(str);
|
|
s := '';
|
|
|
|
for i:=1 to len do
|
|
begin
|
|
if not fCaseSensitive
|
|
then la1 := UpCase(LA(1))
|
|
else la1 := LA(1);
|
|
|
|
s := s + la1;
|
|
|
|
if la1 <> str[i] then
|
|
Raise EMismatchedChar.Create( s,
|
|
str,
|
|
fLexerState.FileName,
|
|
fLexerState.Line,
|
|
fLexerState.Column)
|
|
else
|
|
Consume
|
|
end;
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// Test Literal
|
|
// ================================================================================================
|
|
function TLexer.TestLiteral(AType: integer): integer;
|
|
begin
|
|
result := fLiterals.Value[TokenText];
|
|
|
|
if result < 0 then
|
|
result := AType
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// Test Literal
|
|
// ================================================================================================
|
|
function TLexer.TestLiteral(AText: AnsiString; AType: integer): integer;
|
|
begin
|
|
result := fLiterals.Value[AText];
|
|
|
|
if result < 0 then
|
|
result := AType
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// Tab
|
|
//
|
|
// Advance the current column number by an appropriate amount. If you do not override this
|
|
// to specify how much to jump for a tab, then tabs are counted as one char. This method is
|
|
// called from consume().
|
|
//
|
|
// update inputState->column as function of inputState->column and tab stops. For example,
|
|
// if tab stops are columns 1 and 5 etc... and column is 3, then add 2 to column.
|
|
// ================================================================================================
|
|
procedure TLexer.Tab;
|
|
begin
|
|
fLexerState.Column := fLexerState.Column +1;
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// UponEof
|
|
//
|
|
// This method is called by YourLexer::nextToken() when the lexer has hit EOF condition.
|
|
// EOF is NOT a character. This method is not called if EOF is reached during syntactic
|
|
// predicate evaluation or during evaluation of normal lexical rules, which presumably
|
|
// would be an IOException. This traps the "normal" EOF condition.
|
|
//
|
|
// UponEOF() is called after the complete evaluation of the previous token and only if your
|
|
// parser asks for another token beyond that last non-EOF token.
|
|
//
|
|
// You might want to throw token or char stream exceptions like: "Heh, premature eof" or a
|
|
// retry stream exception ("I found the end of this file, go back to referencing file").
|
|
// ================================================================================================
|
|
procedure TLexer.UponEof;
|
|
begin
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// Initialize
|
|
// ================================================================================================
|
|
procedure TLexer.Initialize;
|
|
begin
|
|
end;
|
|
// ================================================================================================
|
|
// ReportError
|
|
// ================================================================================================
|
|
procedure TLexer.ReportError(e: Exception);
|
|
begin
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// LA
|
|
// ================================================================================================
|
|
function TLexer.LA(i: integer): AnsiChar;
|
|
begin
|
|
result := fLexerState.InputBuffer.LA(i)
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// Commit
|
|
// ================================================================================================
|
|
procedure TLexer.Commit;
|
|
begin
|
|
fLexerState.InputBuffer.Commit
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// Mark
|
|
// ================================================================================================
|
|
function TLexer.Mark: integer;
|
|
begin
|
|
result := fLexerState.InputBuffer.Mark
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// Pos
|
|
// ================================================================================================
|
|
procedure TLexer.Rewind(Pos: integer);
|
|
begin
|
|
fLexerState.InputBuffer.Rewind(Pos)
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// ResetText
|
|
// ================================================================================================
|
|
procedure TLexer.ResetText;
|
|
begin
|
|
TokenText := '';
|
|
|
|
with fLexerState do
|
|
begin
|
|
TokenStartLine := Line;
|
|
TokenStartColumn := Column;
|
|
end
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// NewLine
|
|
// ================================================================================================
|
|
procedure TLexer.NewLine;
|
|
begin
|
|
with fLexerState do
|
|
begin
|
|
Line := Line +1;
|
|
Column := 1;
|
|
end
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// MakeToken
|
|
// ================================================================================================
|
|
function TLexer.MakeToken(ATokenType: integer; ATokenText: AnsiString): IToken;
|
|
begin
|
|
if fTokenClass = nil then
|
|
fTokenClass := TToken;
|
|
|
|
result := fTokenClass.Create( ATokenType, ATokenText);
|
|
|
|
with result, fLexerState do
|
|
begin
|
|
TokenLine := Line;
|
|
TokenColumn := Column
|
|
end;
|
|
end;
|
|
|
|
|
|
end.
|