Initial check in docu
This commit is contained in:
@@ -0,0 +1,607 @@
|
||||
unit wpLex;
|
||||
|
||||
interface
|
||||
uses
|
||||
Classes,
|
||||
SysUtils,
|
||||
Generics.Collections;
|
||||
|
||||
|
||||
type
|
||||
TTokenType =
|
||||
(
|
||||
TT_EOF,
|
||||
TT_SKIP,
|
||||
TT_COMMENT,
|
||||
|
||||
TT_LPAREN, // (
|
||||
TT_RPAREN, // )
|
||||
TT_LBRACKET, // [
|
||||
TT_RBRACKET, // ]
|
||||
|
||||
TT_STAR, // *
|
||||
TT_SLASH, // /
|
||||
TT_PLUS, // +
|
||||
TT_MINUS, // -
|
||||
|
||||
TT_LT, // <
|
||||
TT_LE, // <=
|
||||
TT_GT, // >
|
||||
TT_GE, // >=
|
||||
|
||||
TT_EQ, // =
|
||||
TT_NE, // <>
|
||||
|
||||
TT_COLON, // :
|
||||
TT_ASSIGN, // :=
|
||||
|
||||
TT_DOT, // .
|
||||
TT_RANGE, // ..
|
||||
TT_PTR, // ^
|
||||
TT_COMMA, // ,
|
||||
TT_SEMI, // ;
|
||||
TT_DOLLAR, // $
|
||||
TT_AT, // @
|
||||
TT_SHARP, // #
|
||||
|
||||
TT_ID,
|
||||
TT_UINT,
|
||||
TT_UREAL,
|
||||
TT_CHAR,
|
||||
TT_STRING,
|
||||
TT_HEX,
|
||||
TT_BIN,
|
||||
|
||||
LT_DO,
|
||||
LT_IF,
|
||||
LT_IN,
|
||||
LT_OF,
|
||||
LT_OR,
|
||||
LT_TO,
|
||||
|
||||
LT_AND,
|
||||
LT_DIV,
|
||||
LT_END,
|
||||
LT_FOR,
|
||||
LT_MOD,
|
||||
LT_NIL,
|
||||
LT_NOT,
|
||||
LT_SET,
|
||||
LT_VAR,
|
||||
LT_XOR,
|
||||
|
||||
LT_CASE,
|
||||
LT_ELSE,
|
||||
LT_FILE,
|
||||
LT_GOTO,
|
||||
LT_THEN,
|
||||
LT_TYPE,
|
||||
LT_USES,
|
||||
LT_WITH,
|
||||
|
||||
LT_ARRAY,
|
||||
LT_BEGIN,
|
||||
LT_CONST,
|
||||
LT_LABEL,
|
||||
LT_UNTIL,
|
||||
LT_WHILE,
|
||||
|
||||
LT_DOWNTO,
|
||||
LT_PACKED,
|
||||
LT_RECORD,
|
||||
LT_REPEAT,
|
||||
|
||||
LT_PROGRAM,
|
||||
LT_FUNCTION,
|
||||
LT_PROCEDURE
|
||||
);
|
||||
|
||||
TTokenTypes = set of TTokenType;
|
||||
TBlah = set of byte;
|
||||
|
||||
TToken = class
|
||||
TokenType : TTokenType;
|
||||
TokenText : AnsiString;
|
||||
TokenLine : integer;
|
||||
TokenColumn : integer;
|
||||
end;
|
||||
|
||||
TTokenMap = TDictionary<AnsiString,TTokenType>;
|
||||
|
||||
|
||||
TwpLex = class
|
||||
private
|
||||
fBuffer : PAnsiChar;
|
||||
fStart : PAnsiChar;
|
||||
fForward : PAnsiChar;
|
||||
|
||||
fLiterals : TTokenMap;
|
||||
|
||||
fTokenLine : integer;
|
||||
fTokenColumn: integer;
|
||||
|
||||
private
|
||||
procedure InitLiterals;
|
||||
|
||||
function CheckLiteral( TokenText : AnsiString;
|
||||
TokenType : TTokenType): TTokenType;
|
||||
|
||||
function MakeToken( TokenText : AnsiString;
|
||||
TokenType : TTokenType): TToken;
|
||||
|
||||
public
|
||||
function NextToken : TToken;
|
||||
|
||||
public
|
||||
constructor Create( Stream: TStream; Length: Int64=-1);
|
||||
destructor Destroy; override;
|
||||
end;
|
||||
|
||||
EwpLex = Exception;
|
||||
|
||||
|
||||
implementation
|
||||
uses
|
||||
Windows;
|
||||
|
||||
|
||||
{ TwpLex }
|
||||
|
||||
// @@@: Construction/destruction ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
//
|
||||
// Construction/destruction
|
||||
//
|
||||
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
// ================================================================================================
|
||||
// Constructor
|
||||
// ================================================================================================
|
||||
constructor TwpLex.Create(Stream: TStream; Length: Int64);
|
||||
var
|
||||
size : Int64;
|
||||
token : TToken;
|
||||
|
||||
begin
|
||||
inherited Create;
|
||||
|
||||
InitLiterals;
|
||||
|
||||
if Assigned(Stream) then
|
||||
begin
|
||||
if Length < 0
|
||||
then size := Stream.Size - Stream.Position
|
||||
else size := Length;
|
||||
|
||||
fBuffer := GetMemory(size+1);
|
||||
|
||||
Stream.Read( fBuffer^, size);
|
||||
|
||||
fStart := fBuffer;
|
||||
fForward := fBuffer;
|
||||
fBuffer[size] := #0;
|
||||
end;
|
||||
end;
|
||||
|
||||
// ================================================================================================
|
||||
// Destructor
|
||||
// ================================================================================================
|
||||
destructor TwpLex.Destroy;
|
||||
begin
|
||||
FreeAndNil(fLiterals);
|
||||
inherited;
|
||||
end;
|
||||
|
||||
// @@@: Internals +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
//
|
||||
// Internals
|
||||
//
|
||||
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
// ================================================================================================
|
||||
// Make Token
|
||||
// ================================================================================================
|
||||
function TwpLex.MakeToken(TokenText: AnsiString; TokenType: TTokenType): TToken;
|
||||
begin
|
||||
result := TToken.Create;
|
||||
result.TokenLine := 0;
|
||||
result.TokenColumn := 0;
|
||||
result.TokenType := TokenType;
|
||||
result.TokenText := TokenText;
|
||||
|
||||
// if TokenType = TT_COMMENT
|
||||
// then result.TokenText := TokenText
|
||||
// else result.TokenText := UpperCase(TokenText);
|
||||
end;
|
||||
|
||||
// ================================================================================================
|
||||
// Init Literals
|
||||
// ================================================================================================
|
||||
procedure TwpLex.InitLiterals;
|
||||
begin
|
||||
fLiterals := TTokenMap.Create;
|
||||
|
||||
fLiterals.Add('do', LT_DO);
|
||||
fLiterals.Add('if', LT_IF);
|
||||
fLiterals.Add('in', LT_IN);
|
||||
fLiterals.Add('of', LT_OF);
|
||||
fLiterals.Add('or', LT_OR);
|
||||
fLiterals.Add('to', LT_TO);
|
||||
|
||||
fLiterals.Add('and', LT_AND);
|
||||
fLiterals.Add('div', LT_DIV);
|
||||
fLiterals.Add('end', LT_END);
|
||||
fLiterals.Add('for', LT_FOR);
|
||||
fLiterals.Add('mod', LT_MOD);
|
||||
fLiterals.Add('nil', LT_NIL);
|
||||
fLiterals.Add('not', LT_NOT);
|
||||
fLiterals.Add('set', LT_SET);
|
||||
fLiterals.Add('var', LT_VAR);
|
||||
fLiterals.Add('xor', LT_XOR);
|
||||
|
||||
fLiterals.Add('case', LT_CASE);
|
||||
fLiterals.Add('else', LT_ELSE);
|
||||
fLiterals.Add('file', LT_FILE);
|
||||
fLiterals.Add('goto', LT_GOTO);
|
||||
fLiterals.Add('then', LT_THEN);
|
||||
fLiterals.Add('type', LT_TYPE);
|
||||
fLiterals.Add('uses', LT_USES);
|
||||
fLiterals.Add('with', LT_WITH);
|
||||
|
||||
fLiterals.Add('array', LT_ARRAY);
|
||||
fLiterals.Add('begin', LT_BEGIN);
|
||||
fLiterals.Add('const', LT_CONST);
|
||||
fLiterals.Add('label', LT_LABEL);
|
||||
fLiterals.Add('until', LT_UNTIL);
|
||||
fLiterals.Add('while', LT_WHILE);
|
||||
|
||||
fLiterals.Add('downto', LT_DOWNTO);
|
||||
fLiterals.Add('packed', LT_PACKED);
|
||||
fLiterals.Add('record', LT_RECORD);
|
||||
fLiterals.Add('repeat', LT_REPEAT);
|
||||
|
||||
fLiterals.Add('program', LT_PROGRAM);
|
||||
fLiterals.Add('function', LT_FUNCTION);
|
||||
fLiterals.Add('procedure', LT_PROCEDURE);
|
||||
end;
|
||||
|
||||
// ================================================================================================
|
||||
// Check Literal
|
||||
// ================================================================================================
|
||||
function TwpLex.CheckLiteral(TokenText: AnsiString; TokenType: TTokenType): TTokenType;
|
||||
var
|
||||
ttype : TTokenType;
|
||||
|
||||
begin
|
||||
if fLiterals.TryGetValue(TokenText, ttype)
|
||||
then result := ttype
|
||||
else result := TokenType
|
||||
end;
|
||||
|
||||
// @@@: Interface +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
//
|
||||
// Interface
|
||||
//
|
||||
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
// ================================================================================================
|
||||
// Next Token
|
||||
// ================================================================================================
|
||||
function TwpLex.NextToken: TToken;
|
||||
|
||||
function GetTokenText: AnsiString;
|
||||
begin
|
||||
SetLength(result, fForward-fStart);
|
||||
MoveMemory( @result[1], fStart, fForward-fStart);
|
||||
end;
|
||||
|
||||
var
|
||||
ttext : AnsiString;
|
||||
ttype : TTokenType;
|
||||
|
||||
begin
|
||||
result := nil;
|
||||
|
||||
while true do
|
||||
begin
|
||||
result := nil;
|
||||
fForward := fStart;
|
||||
|
||||
case fForward^ of
|
||||
|
||||
// id
|
||||
'a'..'z','A'..'Z','_':
|
||||
begin
|
||||
INC(fForward);
|
||||
ttype := TT_ID;
|
||||
|
||||
while fForward^ in ['a'..'z','A'..'Z','0'..'9','_'] do
|
||||
INC(fForward);
|
||||
|
||||
ttype := CheckLiteral( GetTokenText, ttype);
|
||||
end;
|
||||
|
||||
// uint or ureal
|
||||
'0'..'9':
|
||||
begin
|
||||
INC(fForward);
|
||||
ttype := TT_UINT;
|
||||
|
||||
while fForward^ in ['0'..'9'] do
|
||||
INC(fForward);
|
||||
|
||||
// fractional part
|
||||
if fForward^ = '.' then
|
||||
begin
|
||||
INC(fForward);
|
||||
|
||||
if fForward^ in ['0'..'9'] then
|
||||
begin
|
||||
INC(fForward);
|
||||
ttype := TT_UREAL;
|
||||
|
||||
while fForward^ in ['0'..'9'] do
|
||||
INC(fForward);
|
||||
end
|
||||
|
||||
else if fForward^ = '.' then
|
||||
ttype := TT_RANGE
|
||||
|
||||
else
|
||||
raise EwpLex.Create('Expected 0..9 in fractional part');
|
||||
end;
|
||||
|
||||
// exponential part
|
||||
if ttype in [TT_UINT, TT_UREAL] then
|
||||
begin
|
||||
if fForward^ in ['e','E'] then
|
||||
begin
|
||||
INC(fForward);
|
||||
ttype := TT_UREAL;
|
||||
|
||||
if fForward^ in ['+','-'] then
|
||||
INC(fForward);
|
||||
|
||||
if fForward^ in ['0'..'9'] then
|
||||
begin
|
||||
INC(fForward);
|
||||
|
||||
while fForward^ in ['0'..'9'] do
|
||||
INC(fForward);
|
||||
end
|
||||
else
|
||||
raise EwpLex.Create('Expected +,-,0..9 in exponential part');
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
// <,<=,<>
|
||||
'<':
|
||||
begin
|
||||
INC(fForward);
|
||||
ttype := TT_LT;
|
||||
|
||||
if fForward^ in ['=','>'] then
|
||||
begin
|
||||
case fForward^ of
|
||||
'=': ttype := TT_LE;
|
||||
'>': ttype := TT_NE;
|
||||
end;
|
||||
|
||||
INC(fForward);
|
||||
end;
|
||||
end;
|
||||
|
||||
// >,>=
|
||||
'>':
|
||||
begin
|
||||
INC(fForward);
|
||||
ttype := TT_GT;
|
||||
|
||||
if fForward^ = '=' then
|
||||
begin
|
||||
INC(fForward);
|
||||
ttype := TT_GE;
|
||||
end;
|
||||
end;
|
||||
|
||||
// :,:=
|
||||
':':
|
||||
begin
|
||||
INC(fForward);
|
||||
ttype := TT_COLON;
|
||||
|
||||
if fForward^ = '=' then
|
||||
begin
|
||||
INC(fForward);
|
||||
ttype := TT_ASSIGN
|
||||
end
|
||||
end;
|
||||
|
||||
// .,..
|
||||
'.':
|
||||
begin
|
||||
INC(fForward);
|
||||
ttype := TT_DOT;
|
||||
|
||||
if fForward^ = '.' then
|
||||
begin
|
||||
INC(fForward);
|
||||
ttype := TT_RANGE;
|
||||
end
|
||||
end;
|
||||
|
||||
// string
|
||||
'''':
|
||||
begin
|
||||
INC(fForward);
|
||||
ttype := TT_CHAR;
|
||||
|
||||
while true do
|
||||
begin
|
||||
if fForward^ in [#10,#13,#0] then
|
||||
raise EwpLex.Create('Newline/EOF found in string');
|
||||
|
||||
if fForward^ = '''' then
|
||||
begin
|
||||
INC(fForward);
|
||||
|
||||
if fForward^ = ''''
|
||||
then INC(fForward)
|
||||
else break
|
||||
|
||||
end
|
||||
else
|
||||
INC(fForward)
|
||||
end;
|
||||
end;
|
||||
|
||||
// /,//
|
||||
'/':
|
||||
begin
|
||||
INC(fForward);
|
||||
ttype := TT_SLASH;
|
||||
|
||||
if fForward^ = '/' then
|
||||
begin
|
||||
INC(fForward);
|
||||
ttype := TT_COMMENT;
|
||||
|
||||
while not (fForward^ in [#13,#10,#0]) do
|
||||
INC(fForward);
|
||||
end
|
||||
end;
|
||||
|
||||
// comment
|
||||
'{':
|
||||
begin
|
||||
INC(fForward);
|
||||
ttype := TT_COMMENT;
|
||||
|
||||
while not (fForward^ in ['}',#0]) do
|
||||
INC(fForward);
|
||||
|
||||
if fForward^ = #0
|
||||
then raise EwpLex.Create('EOF reached in comment')
|
||||
else INC( fForward);
|
||||
end;
|
||||
|
||||
'(':
|
||||
begin
|
||||
INC(fForward);
|
||||
ttype := TT_LPAREN;
|
||||
|
||||
if fForward^ = '*' then
|
||||
begin
|
||||
INC(fForward);
|
||||
ttype := TT_COMMENT;
|
||||
|
||||
while true do
|
||||
begin
|
||||
if fForward^ = #0 then
|
||||
raise EwpLex.Create('EOF reached in comment');
|
||||
|
||||
if fForward^ = '*' then
|
||||
begin
|
||||
INC(fForward);
|
||||
|
||||
if fForward^ = ')' then
|
||||
begin
|
||||
INC(fForward);
|
||||
break;
|
||||
end;
|
||||
end
|
||||
end
|
||||
end
|
||||
end;
|
||||
|
||||
// hex number
|
||||
'$':
|
||||
begin
|
||||
INC(fForward);
|
||||
|
||||
if fForward^ in ['0'..'9','a'..'f','A'..'F'] then
|
||||
begin
|
||||
INC(fForward);
|
||||
ttype := TT_HEX;
|
||||
|
||||
while fForward^ in ['0'..'9','a'..'f','A'..'F'] do
|
||||
INC(fForward);
|
||||
end
|
||||
|
||||
else
|
||||
raise EwpLex.Create('Expected hexadecimal digit');
|
||||
end;
|
||||
|
||||
'%':
|
||||
begin
|
||||
INC(fForward);
|
||||
|
||||
if fForward^ in ['0'..'1'] then
|
||||
begin
|
||||
INC(fForward);
|
||||
ttype := TT_BIN;
|
||||
|
||||
while fForward^ in ['0'..'1'] do
|
||||
INC(fForward);
|
||||
end
|
||||
|
||||
else
|
||||
raise EwpLex.Create('Expected binary digit');
|
||||
end;
|
||||
|
||||
else
|
||||
case fForward^ of
|
||||
')': begin ttype := TT_RPAREN; INC(fForward) end;
|
||||
'[': begin ttype := TT_LBRACKET; INC(fForward) end;
|
||||
']': begin ttype := TT_RBRACKET; INC(fForward) end;
|
||||
|
||||
'*': begin ttype := TT_STAR; INC(fForward) end;
|
||||
'+': begin ttype := TT_PLUS; INC(fForward) end;
|
||||
'-': begin ttype := TT_MINUS; INC(fForward) end;
|
||||
|
||||
'=': begin ttype := TT_EQ; INC(fForward) end;
|
||||
|
||||
'^': begin ttype := TT_PTR; INC(fForward) end;
|
||||
';': begin ttype := TT_SEMI; INC(fForward) end;
|
||||
',': begin ttype := TT_COMMA; INC(fForward) end;
|
||||
'$': begin ttype := TT_DOLLAR; INC(fForward) end;
|
||||
|
||||
'@': begin ttype := TT_AT; INC(fForward) end;
|
||||
'#': begin ttype := TT_SHARP; INC(fForward) end;
|
||||
|
||||
#9 : begin ttype := TT_SKIP; INC(fForward) end;
|
||||
#10: begin ttype := TT_SKIP; INC(fForward) end;
|
||||
#13: begin ttype := TT_SKIP; INC(fForward) end;
|
||||
#32: begin ttype := TT_SKIP; INC(fForward) end;
|
||||
|
||||
// EOF
|
||||
#0 : ttype := TT_EOF;
|
||||
|
||||
else
|
||||
raise EwpLex.Create('Invalid character '+fForward^);
|
||||
end
|
||||
end;
|
||||
|
||||
if ttype <> TT_SKIP then
|
||||
begin
|
||||
ttext := GetTokenText;
|
||||
result := MakeToken( ttext, ttype);
|
||||
fStart := fForward;
|
||||
|
||||
break;
|
||||
end;
|
||||
|
||||
fStart := fForward;
|
||||
end;
|
||||
end;
|
||||
|
||||
end.
|
||||
Reference in New Issue
Block a user