unit wpLex; interface uses Classes, SysUtils, Generics.Collections; type TTokenType = ( TT_EOF, TT_SKIP, TT_COMMENT, TT_LPAREN, // ( TT_RPAREN, // ) TT_LBRACKET, // [ TT_RBRACKET, // ] TT_STAR, // * TT_SLASH, // / TT_PLUS, // + TT_MINUS, // - TT_LT, // < TT_LE, // <= TT_GT, // > TT_GE, // >= TT_EQ, // = TT_NE, // <> TT_COLON, // : TT_ASSIGN, // := TT_DOT, // . TT_RANGE, // .. TT_PTR, // ^ TT_COMMA, // , TT_SEMI, // ; TT_DOLLAR, // $ TT_AT, // @ TT_SHARP, // # TT_ID, TT_UINT, TT_UREAL, TT_CHAR, TT_STRING, TT_HEX, TT_BIN, LT_DO, LT_IF, LT_IN, LT_OF, LT_OR, LT_TO, LT_AND, LT_DIV, LT_END, LT_FOR, LT_MOD, LT_NIL, LT_NOT, LT_SET, LT_VAR, LT_XOR, LT_CASE, LT_ELSE, LT_FILE, LT_GOTO, LT_THEN, LT_TYPE, LT_USES, LT_WITH, LT_ARRAY, LT_BEGIN, LT_CONST, LT_LABEL, LT_UNTIL, LT_WHILE, LT_DOWNTO, LT_PACKED, LT_RECORD, LT_REPEAT, LT_PROGRAM, LT_FUNCTION, LT_PROCEDURE ); TTokenTypes = set of TTokenType; TBlah = set of byte; TToken = class TokenType : TTokenType; TokenText : AnsiString; TokenLine : integer; TokenColumn : integer; end; TTokenMap = TDictionary; TwpLex = class private fBuffer : PAnsiChar; fStart : PAnsiChar; fForward : PAnsiChar; fLiterals : TTokenMap; fTokenLine : integer; fTokenColumn: integer; private procedure InitLiterals; function CheckLiteral( TokenText : AnsiString; TokenType : TTokenType): TTokenType; function MakeToken( TokenText : AnsiString; TokenType : TTokenType): TToken; public function NextToken : TToken; public constructor Create( Stream: TStream; Length: Int64=-1); destructor Destroy; override; end; EwpLex = Exception; implementation uses Windows; { TwpLex } // @@@: Construction/destruction ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // // Construction/destruction // // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ================================================================================================ // Constructor // ================================================================================================ constructor TwpLex.Create(Stream: TStream; Length: Int64); var size : Int64; token : TToken; begin inherited Create; InitLiterals; if Assigned(Stream) then begin if Length < 0 then size := Stream.Size - Stream.Position else size := Length; fBuffer := GetMemory(size+1); Stream.Read( fBuffer^, size); fStart := fBuffer; fForward := fBuffer; fBuffer[size] := #0; end; end; // ================================================================================================ // Destructor // ================================================================================================ destructor TwpLex.Destroy; begin FreeAndNil(fLiterals); inherited; end; // @@@: Internals +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // // Internals // // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ================================================================================================ // Make Token // ================================================================================================ function TwpLex.MakeToken(TokenText: AnsiString; TokenType: TTokenType): TToken; begin result := TToken.Create; result.TokenLine := 0; result.TokenColumn := 0; result.TokenType := TokenType; result.TokenText := TokenText; // if TokenType = TT_COMMENT // then result.TokenText := TokenText // else result.TokenText := UpperCase(TokenText); end; // ================================================================================================ // Init Literals // ================================================================================================ procedure TwpLex.InitLiterals; begin fLiterals := TTokenMap.Create; fLiterals.Add('do', LT_DO); fLiterals.Add('if', LT_IF); fLiterals.Add('in', LT_IN); fLiterals.Add('of', LT_OF); fLiterals.Add('or', LT_OR); fLiterals.Add('to', LT_TO); fLiterals.Add('and', LT_AND); fLiterals.Add('div', LT_DIV); fLiterals.Add('end', LT_END); fLiterals.Add('for', LT_FOR); fLiterals.Add('mod', LT_MOD); fLiterals.Add('nil', LT_NIL); fLiterals.Add('not', LT_NOT); fLiterals.Add('set', LT_SET); fLiterals.Add('var', LT_VAR); fLiterals.Add('xor', LT_XOR); fLiterals.Add('case', LT_CASE); fLiterals.Add('else', LT_ELSE); fLiterals.Add('file', LT_FILE); fLiterals.Add('goto', LT_GOTO); fLiterals.Add('then', LT_THEN); fLiterals.Add('type', LT_TYPE); fLiterals.Add('uses', LT_USES); fLiterals.Add('with', LT_WITH); fLiterals.Add('array', LT_ARRAY); fLiterals.Add('begin', LT_BEGIN); fLiterals.Add('const', LT_CONST); fLiterals.Add('label', LT_LABEL); fLiterals.Add('until', LT_UNTIL); fLiterals.Add('while', LT_WHILE); fLiterals.Add('downto', LT_DOWNTO); fLiterals.Add('packed', LT_PACKED); fLiterals.Add('record', LT_RECORD); fLiterals.Add('repeat', LT_REPEAT); fLiterals.Add('program', LT_PROGRAM); fLiterals.Add('function', LT_FUNCTION); fLiterals.Add('procedure', LT_PROCEDURE); end; // ================================================================================================ // Check Literal // ================================================================================================ function TwpLex.CheckLiteral(TokenText: AnsiString; TokenType: TTokenType): TTokenType; var ttype : TTokenType; begin if fLiterals.TryGetValue(TokenText, ttype) then result := ttype else result := TokenType end; // @@@: Interface +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // // Interface // // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ================================================================================================ // Next Token // ================================================================================================ function TwpLex.NextToken: TToken; function GetTokenText: AnsiString; begin SetLength(result, fForward-fStart); MoveMemory( @result[1], fStart, fForward-fStart); end; var ttext : AnsiString; ttype : TTokenType; begin result := nil; while true do begin result := nil; fForward := fStart; case fForward^ of // id 'a'..'z','A'..'Z','_': begin INC(fForward); ttype := TT_ID; while fForward^ in ['a'..'z','A'..'Z','0'..'9','_'] do INC(fForward); ttype := CheckLiteral( GetTokenText, ttype); end; // uint or ureal '0'..'9': begin INC(fForward); ttype := TT_UINT; while fForward^ in ['0'..'9'] do INC(fForward); // fractional part if fForward^ = '.' then begin INC(fForward); if fForward^ in ['0'..'9'] then begin INC(fForward); ttype := TT_UREAL; while fForward^ in ['0'..'9'] do INC(fForward); end else if fForward^ = '.' then ttype := TT_RANGE else raise EwpLex.Create('Expected 0..9 in fractional part'); end; // exponential part if ttype in [TT_UINT, TT_UREAL] then begin if fForward^ in ['e','E'] then begin INC(fForward); ttype := TT_UREAL; if fForward^ in ['+','-'] then INC(fForward); if fForward^ in ['0'..'9'] then begin INC(fForward); while fForward^ in ['0'..'9'] do INC(fForward); end else raise EwpLex.Create('Expected +,-,0..9 in exponential part'); end; end; end; // <,<=,<> '<': begin INC(fForward); ttype := TT_LT; if fForward^ in ['=','>'] then begin case fForward^ of '=': ttype := TT_LE; '>': ttype := TT_NE; end; INC(fForward); end; end; // >,>= '>': begin INC(fForward); ttype := TT_GT; if fForward^ = '=' then begin INC(fForward); ttype := TT_GE; end; end; // :,:= ':': begin INC(fForward); ttype := TT_COLON; if fForward^ = '=' then begin INC(fForward); ttype := TT_ASSIGN end end; // .,.. '.': begin INC(fForward); ttype := TT_DOT; if fForward^ = '.' then begin INC(fForward); ttype := TT_RANGE; end end; // string '''': begin INC(fForward); ttype := TT_CHAR; while true do begin if fForward^ in [#10,#13,#0] then raise EwpLex.Create('Newline/EOF found in string'); if fForward^ = '''' then begin INC(fForward); if fForward^ = '''' then INC(fForward) else break end else INC(fForward) end; end; // /,// '/': begin INC(fForward); ttype := TT_SLASH; if fForward^ = '/' then begin INC(fForward); ttype := TT_COMMENT; while not (fForward^ in [#13,#10,#0]) do INC(fForward); end end; // comment '{': begin INC(fForward); ttype := TT_COMMENT; while not (fForward^ in ['}',#0]) do INC(fForward); if fForward^ = #0 then raise EwpLex.Create('EOF reached in comment') else INC( fForward); end; '(': begin INC(fForward); ttype := TT_LPAREN; if fForward^ = '*' then begin INC(fForward); ttype := TT_COMMENT; while true do begin if fForward^ = #0 then raise EwpLex.Create('EOF reached in comment'); if fForward^ = '*' then begin INC(fForward); if fForward^ = ')' then begin INC(fForward); break; end; end end end end; // hex number '$': begin INC(fForward); if fForward^ in ['0'..'9','a'..'f','A'..'F'] then begin INC(fForward); ttype := TT_HEX; while fForward^ in ['0'..'9','a'..'f','A'..'F'] do INC(fForward); end else raise EwpLex.Create('Expected hexadecimal digit'); end; '%': begin INC(fForward); if fForward^ in ['0'..'1'] then begin INC(fForward); ttype := TT_BIN; while fForward^ in ['0'..'1'] do INC(fForward); end else raise EwpLex.Create('Expected binary digit'); end; else case fForward^ of ')': begin ttype := TT_RPAREN; INC(fForward) end; '[': begin ttype := TT_LBRACKET; INC(fForward) end; ']': begin ttype := TT_RBRACKET; INC(fForward) end; '*': begin ttype := TT_STAR; INC(fForward) end; '+': begin ttype := TT_PLUS; INC(fForward) end; '-': begin ttype := TT_MINUS; INC(fForward) end; '=': begin ttype := TT_EQ; INC(fForward) end; '^': begin ttype := TT_PTR; INC(fForward) end; ';': begin ttype := TT_SEMI; INC(fForward) end; ',': begin ttype := TT_COMMA; INC(fForward) end; '$': begin ttype := TT_DOLLAR; INC(fForward) end; '@': begin ttype := TT_AT; INC(fForward) end; '#': begin ttype := TT_SHARP; INC(fForward) end; #9 : begin ttype := TT_SKIP; INC(fForward) end; #10: begin ttype := TT_SKIP; INC(fForward) end; #13: begin ttype := TT_SKIP; INC(fForward) end; #32: begin ttype := TT_SKIP; INC(fForward) end; // EOF #0 : ttype := TT_EOF; else raise EwpLex.Create('Invalid character '+fForward^); end end; if ttype <> TT_SKIP then begin ttext := GetTokenText; result := MakeToken( ttext, ttype); fStart := fForward; break; end; fStart := fForward; end; end; end.