Files
bds.mr.dpg/doc/grammars/pascal/wirth/wpLex.pas
T
2026-01-03 18:31:15 +01:00

608 lines
16 KiB
ObjectPascal

unit wpLex;
interface
uses
Classes,
SysUtils,
Generics.Collections;
type
TTokenType =
(
TT_EOF,
TT_SKIP,
TT_COMMENT,
TT_LPAREN, // (
TT_RPAREN, // )
TT_LBRACKET, // [
TT_RBRACKET, // ]
TT_STAR, // *
TT_SLASH, // /
TT_PLUS, // +
TT_MINUS, // -
TT_LT, // <
TT_LE, // <=
TT_GT, // >
TT_GE, // >=
TT_EQ, // =
TT_NE, // <>
TT_COLON, // :
TT_ASSIGN, // :=
TT_DOT, // .
TT_RANGE, // ..
TT_PTR, // ^
TT_COMMA, // ,
TT_SEMI, // ;
TT_DOLLAR, // $
TT_AT, // @
TT_SHARP, // #
TT_ID,
TT_UINT,
TT_UREAL,
TT_CHAR,
TT_STRING,
TT_HEX,
TT_BIN,
LT_DO,
LT_IF,
LT_IN,
LT_OF,
LT_OR,
LT_TO,
LT_AND,
LT_DIV,
LT_END,
LT_FOR,
LT_MOD,
LT_NIL,
LT_NOT,
LT_SET,
LT_VAR,
LT_XOR,
LT_CASE,
LT_ELSE,
LT_FILE,
LT_GOTO,
LT_THEN,
LT_TYPE,
LT_USES,
LT_WITH,
LT_ARRAY,
LT_BEGIN,
LT_CONST,
LT_LABEL,
LT_UNTIL,
LT_WHILE,
LT_DOWNTO,
LT_PACKED,
LT_RECORD,
LT_REPEAT,
LT_PROGRAM,
LT_FUNCTION,
LT_PROCEDURE
);
TTokenTypes = set of TTokenType;
TBlah = set of byte;
TToken = class
TokenType : TTokenType;
TokenText : AnsiString;
TokenLine : integer;
TokenColumn : integer;
end;
TTokenMap = TDictionary<AnsiString,TTokenType>;
TwpLex = class
private
fBuffer : PAnsiChar;
fStart : PAnsiChar;
fForward : PAnsiChar;
fLiterals : TTokenMap;
fTokenLine : integer;
fTokenColumn: integer;
private
procedure InitLiterals;
function CheckLiteral( TokenText : AnsiString;
TokenType : TTokenType): TTokenType;
function MakeToken( TokenText : AnsiString;
TokenType : TTokenType): TToken;
public
function NextToken : TToken;
public
constructor Create( Stream: TStream; Length: Int64=-1);
destructor Destroy; override;
end;
EwpLex = Exception;
implementation
uses
Windows;
{ TwpLex }
// @@@: Construction/destruction ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//
// Construction/destruction
//
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ================================================================================================
// Constructor
// ================================================================================================
constructor TwpLex.Create(Stream: TStream; Length: Int64);
var
size : Int64;
token : TToken;
begin
inherited Create;
InitLiterals;
if Assigned(Stream) then
begin
if Length < 0
then size := Stream.Size - Stream.Position
else size := Length;
fBuffer := GetMemory(size+1);
Stream.Read( fBuffer^, size);
fStart := fBuffer;
fForward := fBuffer;
fBuffer[size] := #0;
end;
end;
// ================================================================================================
// Destructor
// ================================================================================================
destructor TwpLex.Destroy;
begin
FreeAndNil(fLiterals);
inherited;
end;
// @@@: Internals +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//
// Internals
//
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ================================================================================================
// Make Token
// ================================================================================================
function TwpLex.MakeToken(TokenText: AnsiString; TokenType: TTokenType): TToken;
begin
result := TToken.Create;
result.TokenLine := 0;
result.TokenColumn := 0;
result.TokenType := TokenType;
result.TokenText := TokenText;
// if TokenType = TT_COMMENT
// then result.TokenText := TokenText
// else result.TokenText := UpperCase(TokenText);
end;
// ================================================================================================
// Init Literals
// ================================================================================================
procedure TwpLex.InitLiterals;
begin
fLiterals := TTokenMap.Create;
fLiterals.Add('do', LT_DO);
fLiterals.Add('if', LT_IF);
fLiterals.Add('in', LT_IN);
fLiterals.Add('of', LT_OF);
fLiterals.Add('or', LT_OR);
fLiterals.Add('to', LT_TO);
fLiterals.Add('and', LT_AND);
fLiterals.Add('div', LT_DIV);
fLiterals.Add('end', LT_END);
fLiterals.Add('for', LT_FOR);
fLiterals.Add('mod', LT_MOD);
fLiterals.Add('nil', LT_NIL);
fLiterals.Add('not', LT_NOT);
fLiterals.Add('set', LT_SET);
fLiterals.Add('var', LT_VAR);
fLiterals.Add('xor', LT_XOR);
fLiterals.Add('case', LT_CASE);
fLiterals.Add('else', LT_ELSE);
fLiterals.Add('file', LT_FILE);
fLiterals.Add('goto', LT_GOTO);
fLiterals.Add('then', LT_THEN);
fLiterals.Add('type', LT_TYPE);
fLiterals.Add('uses', LT_USES);
fLiterals.Add('with', LT_WITH);
fLiterals.Add('array', LT_ARRAY);
fLiterals.Add('begin', LT_BEGIN);
fLiterals.Add('const', LT_CONST);
fLiterals.Add('label', LT_LABEL);
fLiterals.Add('until', LT_UNTIL);
fLiterals.Add('while', LT_WHILE);
fLiterals.Add('downto', LT_DOWNTO);
fLiterals.Add('packed', LT_PACKED);
fLiterals.Add('record', LT_RECORD);
fLiterals.Add('repeat', LT_REPEAT);
fLiterals.Add('program', LT_PROGRAM);
fLiterals.Add('function', LT_FUNCTION);
fLiterals.Add('procedure', LT_PROCEDURE);
end;
// ================================================================================================
// Check Literal
// ================================================================================================
function TwpLex.CheckLiteral(TokenText: AnsiString; TokenType: TTokenType): TTokenType;
var
ttype : TTokenType;
begin
if fLiterals.TryGetValue(TokenText, ttype)
then result := ttype
else result := TokenType
end;
// @@@: Interface +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//
// Interface
//
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ================================================================================================
// Next Token
// ================================================================================================
function TwpLex.NextToken: TToken;
function GetTokenText: AnsiString;
begin
SetLength(result, fForward-fStart);
MoveMemory( @result[1], fStart, fForward-fStart);
end;
var
ttext : AnsiString;
ttype : TTokenType;
begin
result := nil;
while true do
begin
result := nil;
fForward := fStart;
case fForward^ of
// id
'a'..'z','A'..'Z','_':
begin
INC(fForward);
ttype := TT_ID;
while fForward^ in ['a'..'z','A'..'Z','0'..'9','_'] do
INC(fForward);
ttype := CheckLiteral( GetTokenText, ttype);
end;
// uint or ureal
'0'..'9':
begin
INC(fForward);
ttype := TT_UINT;
while fForward^ in ['0'..'9'] do
INC(fForward);
// fractional part
if fForward^ = '.' then
begin
INC(fForward);
if fForward^ in ['0'..'9'] then
begin
INC(fForward);
ttype := TT_UREAL;
while fForward^ in ['0'..'9'] do
INC(fForward);
end
else if fForward^ = '.' then
ttype := TT_RANGE
else
raise EwpLex.Create('Expected 0..9 in fractional part');
end;
// exponential part
if ttype in [TT_UINT, TT_UREAL] then
begin
if fForward^ in ['e','E'] then
begin
INC(fForward);
ttype := TT_UREAL;
if fForward^ in ['+','-'] then
INC(fForward);
if fForward^ in ['0'..'9'] then
begin
INC(fForward);
while fForward^ in ['0'..'9'] do
INC(fForward);
end
else
raise EwpLex.Create('Expected +,-,0..9 in exponential part');
end;
end;
end;
// <,<=,<>
'<':
begin
INC(fForward);
ttype := TT_LT;
if fForward^ in ['=','>'] then
begin
case fForward^ of
'=': ttype := TT_LE;
'>': ttype := TT_NE;
end;
INC(fForward);
end;
end;
// >,>=
'>':
begin
INC(fForward);
ttype := TT_GT;
if fForward^ = '=' then
begin
INC(fForward);
ttype := TT_GE;
end;
end;
// :,:=
':':
begin
INC(fForward);
ttype := TT_COLON;
if fForward^ = '=' then
begin
INC(fForward);
ttype := TT_ASSIGN
end
end;
// .,..
'.':
begin
INC(fForward);
ttype := TT_DOT;
if fForward^ = '.' then
begin
INC(fForward);
ttype := TT_RANGE;
end
end;
// string
'''':
begin
INC(fForward);
ttype := TT_CHAR;
while true do
begin
if fForward^ in [#10,#13,#0] then
raise EwpLex.Create('Newline/EOF found in string');
if fForward^ = '''' then
begin
INC(fForward);
if fForward^ = ''''
then INC(fForward)
else break
end
else
INC(fForward)
end;
end;
// /,//
'/':
begin
INC(fForward);
ttype := TT_SLASH;
if fForward^ = '/' then
begin
INC(fForward);
ttype := TT_COMMENT;
while not (fForward^ in [#13,#10,#0]) do
INC(fForward);
end
end;
// comment
'{':
begin
INC(fForward);
ttype := TT_COMMENT;
while not (fForward^ in ['}',#0]) do
INC(fForward);
if fForward^ = #0
then raise EwpLex.Create('EOF reached in comment')
else INC( fForward);
end;
'(':
begin
INC(fForward);
ttype := TT_LPAREN;
if fForward^ = '*' then
begin
INC(fForward);
ttype := TT_COMMENT;
while true do
begin
if fForward^ = #0 then
raise EwpLex.Create('EOF reached in comment');
if fForward^ = '*' then
begin
INC(fForward);
if fForward^ = ')' then
begin
INC(fForward);
break;
end;
end
end
end
end;
// hex number
'$':
begin
INC(fForward);
if fForward^ in ['0'..'9','a'..'f','A'..'F'] then
begin
INC(fForward);
ttype := TT_HEX;
while fForward^ in ['0'..'9','a'..'f','A'..'F'] do
INC(fForward);
end
else
raise EwpLex.Create('Expected hexadecimal digit');
end;
'%':
begin
INC(fForward);
if fForward^ in ['0'..'1'] then
begin
INC(fForward);
ttype := TT_BIN;
while fForward^ in ['0'..'1'] do
INC(fForward);
end
else
raise EwpLex.Create('Expected binary digit');
end;
else
case fForward^ of
')': begin ttype := TT_RPAREN; INC(fForward) end;
'[': begin ttype := TT_LBRACKET; INC(fForward) end;
']': begin ttype := TT_RBRACKET; INC(fForward) end;
'*': begin ttype := TT_STAR; INC(fForward) end;
'+': begin ttype := TT_PLUS; INC(fForward) end;
'-': begin ttype := TT_MINUS; INC(fForward) end;
'=': begin ttype := TT_EQ; INC(fForward) end;
'^': begin ttype := TT_PTR; INC(fForward) end;
';': begin ttype := TT_SEMI; INC(fForward) end;
',': begin ttype := TT_COMMA; INC(fForward) end;
'$': begin ttype := TT_DOLLAR; INC(fForward) end;
'@': begin ttype := TT_AT; INC(fForward) end;
'#': begin ttype := TT_SHARP; INC(fForward) end;
#9 : begin ttype := TT_SKIP; INC(fForward) end;
#10: begin ttype := TT_SKIP; INC(fForward) end;
#13: begin ttype := TT_SKIP; INC(fForward) end;
#32: begin ttype := TT_SKIP; INC(fForward) end;
// EOF
#0 : ttype := TT_EOF;
else
raise EwpLex.Create('Invalid character '+fForward^);
end
end;
if ttype <> TT_SKIP then
begin
ttext := GetTokenText;
result := MakeToken( ttext, ttype);
fStart := fForward;
break;
end;
fStart := fForward;
end;
end;
end.