608 lines
16 KiB
ObjectPascal
608 lines
16 KiB
ObjectPascal
unit wpLex;
|
|
|
|
interface
|
|
uses
|
|
Classes,
|
|
SysUtils,
|
|
Generics.Collections;
|
|
|
|
|
|
type
|
|
TTokenType =
|
|
(
|
|
TT_EOF,
|
|
TT_SKIP,
|
|
TT_COMMENT,
|
|
|
|
TT_LPAREN, // (
|
|
TT_RPAREN, // )
|
|
TT_LBRACKET, // [
|
|
TT_RBRACKET, // ]
|
|
|
|
TT_STAR, // *
|
|
TT_SLASH, // /
|
|
TT_PLUS, // +
|
|
TT_MINUS, // -
|
|
|
|
TT_LT, // <
|
|
TT_LE, // <=
|
|
TT_GT, // >
|
|
TT_GE, // >=
|
|
|
|
TT_EQ, // =
|
|
TT_NE, // <>
|
|
|
|
TT_COLON, // :
|
|
TT_ASSIGN, // :=
|
|
|
|
TT_DOT, // .
|
|
TT_RANGE, // ..
|
|
TT_PTR, // ^
|
|
TT_COMMA, // ,
|
|
TT_SEMI, // ;
|
|
TT_DOLLAR, // $
|
|
TT_AT, // @
|
|
TT_SHARP, // #
|
|
|
|
TT_ID,
|
|
TT_UINT,
|
|
TT_UREAL,
|
|
TT_CHAR,
|
|
TT_STRING,
|
|
TT_HEX,
|
|
TT_BIN,
|
|
|
|
LT_DO,
|
|
LT_IF,
|
|
LT_IN,
|
|
LT_OF,
|
|
LT_OR,
|
|
LT_TO,
|
|
|
|
LT_AND,
|
|
LT_DIV,
|
|
LT_END,
|
|
LT_FOR,
|
|
LT_MOD,
|
|
LT_NIL,
|
|
LT_NOT,
|
|
LT_SET,
|
|
LT_VAR,
|
|
LT_XOR,
|
|
|
|
LT_CASE,
|
|
LT_ELSE,
|
|
LT_FILE,
|
|
LT_GOTO,
|
|
LT_THEN,
|
|
LT_TYPE,
|
|
LT_USES,
|
|
LT_WITH,
|
|
|
|
LT_ARRAY,
|
|
LT_BEGIN,
|
|
LT_CONST,
|
|
LT_LABEL,
|
|
LT_UNTIL,
|
|
LT_WHILE,
|
|
|
|
LT_DOWNTO,
|
|
LT_PACKED,
|
|
LT_RECORD,
|
|
LT_REPEAT,
|
|
|
|
LT_PROGRAM,
|
|
LT_FUNCTION,
|
|
LT_PROCEDURE
|
|
);
|
|
|
|
TTokenTypes = set of TTokenType;
|
|
TBlah = set of byte;
|
|
|
|
TToken = class
|
|
TokenType : TTokenType;
|
|
TokenText : AnsiString;
|
|
TokenLine : integer;
|
|
TokenColumn : integer;
|
|
end;
|
|
|
|
TTokenMap = TDictionary<AnsiString,TTokenType>;
|
|
|
|
|
|
TwpLex = class
|
|
private
|
|
fBuffer : PAnsiChar;
|
|
fStart : PAnsiChar;
|
|
fForward : PAnsiChar;
|
|
|
|
fLiterals : TTokenMap;
|
|
|
|
fTokenLine : integer;
|
|
fTokenColumn: integer;
|
|
|
|
private
|
|
procedure InitLiterals;
|
|
|
|
function CheckLiteral( TokenText : AnsiString;
|
|
TokenType : TTokenType): TTokenType;
|
|
|
|
function MakeToken( TokenText : AnsiString;
|
|
TokenType : TTokenType): TToken;
|
|
|
|
public
|
|
function NextToken : TToken;
|
|
|
|
public
|
|
constructor Create( Stream: TStream; Length: Int64=-1);
|
|
destructor Destroy; override;
|
|
end;
|
|
|
|
EwpLex = Exception;
|
|
|
|
|
|
implementation
|
|
uses
|
|
Windows;
|
|
|
|
|
|
{ TwpLex }
|
|
|
|
// @@@: Construction/destruction ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
//
|
|
// Construction/destruction
|
|
//
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ================================================================================================
|
|
// Constructor
|
|
// ================================================================================================
|
|
constructor TwpLex.Create(Stream: TStream; Length: Int64);
|
|
var
|
|
size : Int64;
|
|
token : TToken;
|
|
|
|
begin
|
|
inherited Create;
|
|
|
|
InitLiterals;
|
|
|
|
if Assigned(Stream) then
|
|
begin
|
|
if Length < 0
|
|
then size := Stream.Size - Stream.Position
|
|
else size := Length;
|
|
|
|
fBuffer := GetMemory(size+1);
|
|
|
|
Stream.Read( fBuffer^, size);
|
|
|
|
fStart := fBuffer;
|
|
fForward := fBuffer;
|
|
fBuffer[size] := #0;
|
|
end;
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// Destructor
|
|
// ================================================================================================
|
|
destructor TwpLex.Destroy;
|
|
begin
|
|
FreeAndNil(fLiterals);
|
|
inherited;
|
|
end;
|
|
|
|
// @@@: Internals +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
//
|
|
// Internals
|
|
//
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ================================================================================================
|
|
// Make Token
|
|
// ================================================================================================
|
|
function TwpLex.MakeToken(TokenText: AnsiString; TokenType: TTokenType): TToken;
|
|
begin
|
|
result := TToken.Create;
|
|
result.TokenLine := 0;
|
|
result.TokenColumn := 0;
|
|
result.TokenType := TokenType;
|
|
result.TokenText := TokenText;
|
|
|
|
// if TokenType = TT_COMMENT
|
|
// then result.TokenText := TokenText
|
|
// else result.TokenText := UpperCase(TokenText);
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// Init Literals
|
|
// ================================================================================================
|
|
procedure TwpLex.InitLiterals;
|
|
begin
|
|
fLiterals := TTokenMap.Create;
|
|
|
|
fLiterals.Add('do', LT_DO);
|
|
fLiterals.Add('if', LT_IF);
|
|
fLiterals.Add('in', LT_IN);
|
|
fLiterals.Add('of', LT_OF);
|
|
fLiterals.Add('or', LT_OR);
|
|
fLiterals.Add('to', LT_TO);
|
|
|
|
fLiterals.Add('and', LT_AND);
|
|
fLiterals.Add('div', LT_DIV);
|
|
fLiterals.Add('end', LT_END);
|
|
fLiterals.Add('for', LT_FOR);
|
|
fLiterals.Add('mod', LT_MOD);
|
|
fLiterals.Add('nil', LT_NIL);
|
|
fLiterals.Add('not', LT_NOT);
|
|
fLiterals.Add('set', LT_SET);
|
|
fLiterals.Add('var', LT_VAR);
|
|
fLiterals.Add('xor', LT_XOR);
|
|
|
|
fLiterals.Add('case', LT_CASE);
|
|
fLiterals.Add('else', LT_ELSE);
|
|
fLiterals.Add('file', LT_FILE);
|
|
fLiterals.Add('goto', LT_GOTO);
|
|
fLiterals.Add('then', LT_THEN);
|
|
fLiterals.Add('type', LT_TYPE);
|
|
fLiterals.Add('uses', LT_USES);
|
|
fLiterals.Add('with', LT_WITH);
|
|
|
|
fLiterals.Add('array', LT_ARRAY);
|
|
fLiterals.Add('begin', LT_BEGIN);
|
|
fLiterals.Add('const', LT_CONST);
|
|
fLiterals.Add('label', LT_LABEL);
|
|
fLiterals.Add('until', LT_UNTIL);
|
|
fLiterals.Add('while', LT_WHILE);
|
|
|
|
fLiterals.Add('downto', LT_DOWNTO);
|
|
fLiterals.Add('packed', LT_PACKED);
|
|
fLiterals.Add('record', LT_RECORD);
|
|
fLiterals.Add('repeat', LT_REPEAT);
|
|
|
|
fLiterals.Add('program', LT_PROGRAM);
|
|
fLiterals.Add('function', LT_FUNCTION);
|
|
fLiterals.Add('procedure', LT_PROCEDURE);
|
|
end;
|
|
|
|
// ================================================================================================
|
|
// Check Literal
|
|
// ================================================================================================
|
|
function TwpLex.CheckLiteral(TokenText: AnsiString; TokenType: TTokenType): TTokenType;
|
|
var
|
|
ttype : TTokenType;
|
|
|
|
begin
|
|
if fLiterals.TryGetValue(TokenText, ttype)
|
|
then result := ttype
|
|
else result := TokenType
|
|
end;
|
|
|
|
// @@@: Interface +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
//
|
|
// Interface
|
|
//
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
// ================================================================================================
|
|
// Next Token
|
|
// ================================================================================================
|
|
function TwpLex.NextToken: TToken;
|
|
|
|
function GetTokenText: AnsiString;
|
|
begin
|
|
SetLength(result, fForward-fStart);
|
|
MoveMemory( @result[1], fStart, fForward-fStart);
|
|
end;
|
|
|
|
var
|
|
ttext : AnsiString;
|
|
ttype : TTokenType;
|
|
|
|
begin
|
|
result := nil;
|
|
|
|
while true do
|
|
begin
|
|
result := nil;
|
|
fForward := fStart;
|
|
|
|
case fForward^ of
|
|
|
|
// id
|
|
'a'..'z','A'..'Z','_':
|
|
begin
|
|
INC(fForward);
|
|
ttype := TT_ID;
|
|
|
|
while fForward^ in ['a'..'z','A'..'Z','0'..'9','_'] do
|
|
INC(fForward);
|
|
|
|
ttype := CheckLiteral( GetTokenText, ttype);
|
|
end;
|
|
|
|
// uint or ureal
|
|
'0'..'9':
|
|
begin
|
|
INC(fForward);
|
|
ttype := TT_UINT;
|
|
|
|
while fForward^ in ['0'..'9'] do
|
|
INC(fForward);
|
|
|
|
// fractional part
|
|
if fForward^ = '.' then
|
|
begin
|
|
INC(fForward);
|
|
|
|
if fForward^ in ['0'..'9'] then
|
|
begin
|
|
INC(fForward);
|
|
ttype := TT_UREAL;
|
|
|
|
while fForward^ in ['0'..'9'] do
|
|
INC(fForward);
|
|
end
|
|
|
|
else if fForward^ = '.' then
|
|
ttype := TT_RANGE
|
|
|
|
else
|
|
raise EwpLex.Create('Expected 0..9 in fractional part');
|
|
end;
|
|
|
|
// exponential part
|
|
if ttype in [TT_UINT, TT_UREAL] then
|
|
begin
|
|
if fForward^ in ['e','E'] then
|
|
begin
|
|
INC(fForward);
|
|
ttype := TT_UREAL;
|
|
|
|
if fForward^ in ['+','-'] then
|
|
INC(fForward);
|
|
|
|
if fForward^ in ['0'..'9'] then
|
|
begin
|
|
INC(fForward);
|
|
|
|
while fForward^ in ['0'..'9'] do
|
|
INC(fForward);
|
|
end
|
|
else
|
|
raise EwpLex.Create('Expected +,-,0..9 in exponential part');
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
// <,<=,<>
|
|
'<':
|
|
begin
|
|
INC(fForward);
|
|
ttype := TT_LT;
|
|
|
|
if fForward^ in ['=','>'] then
|
|
begin
|
|
case fForward^ of
|
|
'=': ttype := TT_LE;
|
|
'>': ttype := TT_NE;
|
|
end;
|
|
|
|
INC(fForward);
|
|
end;
|
|
end;
|
|
|
|
// >,>=
|
|
'>':
|
|
begin
|
|
INC(fForward);
|
|
ttype := TT_GT;
|
|
|
|
if fForward^ = '=' then
|
|
begin
|
|
INC(fForward);
|
|
ttype := TT_GE;
|
|
end;
|
|
end;
|
|
|
|
// :,:=
|
|
':':
|
|
begin
|
|
INC(fForward);
|
|
ttype := TT_COLON;
|
|
|
|
if fForward^ = '=' then
|
|
begin
|
|
INC(fForward);
|
|
ttype := TT_ASSIGN
|
|
end
|
|
end;
|
|
|
|
// .,..
|
|
'.':
|
|
begin
|
|
INC(fForward);
|
|
ttype := TT_DOT;
|
|
|
|
if fForward^ = '.' then
|
|
begin
|
|
INC(fForward);
|
|
ttype := TT_RANGE;
|
|
end
|
|
end;
|
|
|
|
// string
|
|
'''':
|
|
begin
|
|
INC(fForward);
|
|
ttype := TT_CHAR;
|
|
|
|
while true do
|
|
begin
|
|
if fForward^ in [#10,#13,#0] then
|
|
raise EwpLex.Create('Newline/EOF found in string');
|
|
|
|
if fForward^ = '''' then
|
|
begin
|
|
INC(fForward);
|
|
|
|
if fForward^ = ''''
|
|
then INC(fForward)
|
|
else break
|
|
|
|
end
|
|
else
|
|
INC(fForward)
|
|
end;
|
|
end;
|
|
|
|
// /,//
|
|
'/':
|
|
begin
|
|
INC(fForward);
|
|
ttype := TT_SLASH;
|
|
|
|
if fForward^ = '/' then
|
|
begin
|
|
INC(fForward);
|
|
ttype := TT_COMMENT;
|
|
|
|
while not (fForward^ in [#13,#10,#0]) do
|
|
INC(fForward);
|
|
end
|
|
end;
|
|
|
|
// comment
|
|
'{':
|
|
begin
|
|
INC(fForward);
|
|
ttype := TT_COMMENT;
|
|
|
|
while not (fForward^ in ['}',#0]) do
|
|
INC(fForward);
|
|
|
|
if fForward^ = #0
|
|
then raise EwpLex.Create('EOF reached in comment')
|
|
else INC( fForward);
|
|
end;
|
|
|
|
'(':
|
|
begin
|
|
INC(fForward);
|
|
ttype := TT_LPAREN;
|
|
|
|
if fForward^ = '*' then
|
|
begin
|
|
INC(fForward);
|
|
ttype := TT_COMMENT;
|
|
|
|
while true do
|
|
begin
|
|
if fForward^ = #0 then
|
|
raise EwpLex.Create('EOF reached in comment');
|
|
|
|
if fForward^ = '*' then
|
|
begin
|
|
INC(fForward);
|
|
|
|
if fForward^ = ')' then
|
|
begin
|
|
INC(fForward);
|
|
break;
|
|
end;
|
|
end
|
|
end
|
|
end
|
|
end;
|
|
|
|
// hex number
|
|
'$':
|
|
begin
|
|
INC(fForward);
|
|
|
|
if fForward^ in ['0'..'9','a'..'f','A'..'F'] then
|
|
begin
|
|
INC(fForward);
|
|
ttype := TT_HEX;
|
|
|
|
while fForward^ in ['0'..'9','a'..'f','A'..'F'] do
|
|
INC(fForward);
|
|
end
|
|
|
|
else
|
|
raise EwpLex.Create('Expected hexadecimal digit');
|
|
end;
|
|
|
|
'%':
|
|
begin
|
|
INC(fForward);
|
|
|
|
if fForward^ in ['0'..'1'] then
|
|
begin
|
|
INC(fForward);
|
|
ttype := TT_BIN;
|
|
|
|
while fForward^ in ['0'..'1'] do
|
|
INC(fForward);
|
|
end
|
|
|
|
else
|
|
raise EwpLex.Create('Expected binary digit');
|
|
end;
|
|
|
|
else
|
|
case fForward^ of
|
|
')': begin ttype := TT_RPAREN; INC(fForward) end;
|
|
'[': begin ttype := TT_LBRACKET; INC(fForward) end;
|
|
']': begin ttype := TT_RBRACKET; INC(fForward) end;
|
|
|
|
'*': begin ttype := TT_STAR; INC(fForward) end;
|
|
'+': begin ttype := TT_PLUS; INC(fForward) end;
|
|
'-': begin ttype := TT_MINUS; INC(fForward) end;
|
|
|
|
'=': begin ttype := TT_EQ; INC(fForward) end;
|
|
|
|
'^': begin ttype := TT_PTR; INC(fForward) end;
|
|
';': begin ttype := TT_SEMI; INC(fForward) end;
|
|
',': begin ttype := TT_COMMA; INC(fForward) end;
|
|
'$': begin ttype := TT_DOLLAR; INC(fForward) end;
|
|
|
|
'@': begin ttype := TT_AT; INC(fForward) end;
|
|
'#': begin ttype := TT_SHARP; INC(fForward) end;
|
|
|
|
#9 : begin ttype := TT_SKIP; INC(fForward) end;
|
|
#10: begin ttype := TT_SKIP; INC(fForward) end;
|
|
#13: begin ttype := TT_SKIP; INC(fForward) end;
|
|
#32: begin ttype := TT_SKIP; INC(fForward) end;
|
|
|
|
// EOF
|
|
#0 : ttype := TT_EOF;
|
|
|
|
else
|
|
raise EwpLex.Create('Invalid character '+fForward^);
|
|
end
|
|
end;
|
|
|
|
if ttype <> TT_SKIP then
|
|
begin
|
|
ttext := GetTokenText;
|
|
result := MakeToken( ttext, ttype);
|
|
fStart := fForward;
|
|
|
|
break;
|
|
end;
|
|
|
|
fStart := fForward;
|
|
end;
|
|
end;
|
|
|
|
end.
|