Files
bds.mr.dpg/src.lib/dpglib.Lookahead.pas
T
2026-01-03 18:33:48 +01:00

374 lines
14 KiB
ObjectPascal

// ----------------------------------------------------------------------------
// This object holds all information needed to represent the lookahead for any
// particular lookahead computation for a single lookahead depth.
// Final lookahead information is a simple bit set, but intermediate stages
// need computation cycle and FOLLOW information.
//
// Concerning the "fCycle" variable.
// If lookahead is computed for a RuleEnd node, then computation is part of
// a FOLLOW cycle for this rule.
// If lookahead is computed for a RuleBlock node, the computation is part of
// a FIRST cycle to this rule.
//
// Concerning the "fEpsilon" variable. (epsilonDepth)
// This is not the depth relative to the rule reference that epsilon was
// encountered. That value is:
//
// initial_k - epsilonDepth + 1
//
// Also, lookahead depths past rule ref for local follow are:
//
// initial_k - (initial_k - epsilonDepth)
//
// Used for rule references. If we try to compute look(k, ruleref) and there
// are fewer than k lookahead terminals before the end of the the rule,
// epsilon will be returned (don't want to pass the end of the rule).
// We must track when the the lookahead got stuck. For example,
//
// a : b A B E F G;
// b : C ;
//
// LOOK(5, ref-to(b)) is {<EPSILON>} with depth = 4, which indicates that at
// 2 (5-4+1) tokens ahead, end of rule was reached.
// Therefore, the token at 4=5-(5-4) past rule ref b must be included in the
// set == F.
//
// The situation is complicated by the fact that a computation may hit the
// end of a rule at many different depths. For example,
//
// a : b A B C ;
// b : E F // epsilon depth of 1 relative to initial k=3
// | G // epsilon depth of 2
// ;
//
// Here, LOOK(3,ref-to(b)) returns epsilon, but the depths are {1, 2};
// i.e., 3-(3-1) and 3-(3-2). Those are the lookahead depths past the rule
// ref needed for the local follow.
//
// This is an empty set unless an epsilon is created.
// ----------------------------------------------------------------------------
unit dpglib.Lookahead;
interface
uses
dpgrtl.types,
dpglib.types;
type
TLookahead = class( TInterfacedObject, ILookahead)
protected
// ------------------------------------------------------------
// Actual 'bitset' of the lookahead
// ------------------------------------------------------------
fLaSet : TByteSet;
// ------------------------------------------------------------
// What 'k' values were being computed when end of rule hit?
// ------------------------------------------------------------
fEpsilon : TByteSet;
// ------------------------------------------------------------
// Does this lookahead depth include Epsilon token type? This
// is used to avoid having a bit in the set for Epsilon as it
// conflicts with parsing binary files.
// ------------------------------------------------------------
fHasEpsilon : boolean;
// ------------------------------------------------------------
// Is this computation part of a computation cycle?
// ------------------------------------------------------------
fCycle : AnsiString;
// ----------------------------------------------------------------------
// ILookahead
// ----------------------------------------------------------------------
protected
function GetLaSet : TByteSet;
function GetEpsilon : TByteSet;
function GetHasEpsilon : boolean;
function GetCycle : AnsiString;
procedure SetLaSet( LaSet : TByteSet);
procedure SetEpsilon( Epsilon : TByteSet);
procedure SetHasEpsilon( HasEpsilon : boolean);
procedure SetCycle( Cycle : AnsiString);
function Intersection( LA : ILookahead) : ILookahead;
procedure CombineWith( LA : ILookahead); overload;
procedure CombineWith( LA : TByteSet); overload;
function IsNil : boolean;
function Clone : ILookahead;
function AsString( pTM: ITokenManager=nil) : AnsiString;
// ----------------------------------------------------------------------
// Construction/destruction
// ----------------------------------------------------------------------
public
constructor Create; overload;
constructor Create( La : ILookahead); overload;
constructor Create( LaSet : TByteSet); overload;
constructor Create( Int : integer); overload;
constructor Create( Cycle : AnsiString); overload;
end;
TLookaheadArray = array of ILookahead;
var
nullLookahead : ILookahead;
implementation
uses
System.SysUtils,
dpglib.utils;
// @@@: Construction/destruction ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//
// Construction/destruction
//
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ================================================================================================
// Constructor
// ================================================================================================
constructor TLookahead.Create;
begin
inherited;
fLaSet := [];
fEpsilon := [];
fHasEpsilon := false;
fCycle := '';
end;
// ================================================================================================
// Constructor(ILookahead)
// ================================================================================================
constructor TLookahead.Create( La: ILookahead);
begin
inherited Create;
fLaSet := La.LaSet;
fEpsilon := La.Epsilon;
fHasEpsilon := La.HasEpsilon;
fCycle := La.Cycle;
end;
// ================================================================================================
// Constructor(TByteSet)
// ================================================================================================
constructor TLookahead.Create( LaSet: TByteSet);
begin
inherited Create;
fLaSet := LaSet;
fEpsilon := [];
fHasEpsilon := false;
fCycle := '';
end;
// ================================================================================================
// Constructor(integer)
// ================================================================================================
constructor TLookahead.Create( Int: integer);
begin
inherited Create;
fLaSet := [Int];
fEpsilon := [];
fHasEpsilon := false;
fCycle := '';
end;
// ================================================================================================
// Constructor(string)
// ================================================================================================
constructor TLookahead.Create( Cycle: AnsiString);
begin
inherited Create;
fLaSet := [];
fEpsilon := [];
fHasEpsilon := false;
fCycle := Cycle;
end;
// @@@: ILookahead implementation +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//
// ILookahead implementation
//
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ================================================================================================
// Get LaSet
// ================================================================================================
function TLookahead.GetLaSet: TByteSet;
begin
result := fLaSet;
end;
// ================================================================================================
// Get Epsilon
// ================================================================================================
function TLookahead.GetEpsilon: TByteSet;
begin
result := fEpsilon;
end;
// ================================================================================================
// Get HasEpsilon
// ================================================================================================
function TLookahead.GetHasEpsilon: boolean;
begin
result := fHasEpsilon;
end;
// ================================================================================================
// Get Cycle
// ================================================================================================
function TLookahead.GetCycle: AnsiString;
begin
result := fCycle;
end;
// ================================================================================================
// Set LaSet
// ================================================================================================
procedure TLookahead.SetLaSet( LaSet: TByteSet);
begin
fLaSet := LaSet
end;
// ================================================================================================
// Set Epsilon
// ================================================================================================
procedure TLookahead.SetEpsilon( Epsilon: TByteSet);
begin
fEpsilon := Epsilon
end;
// ================================================================================================
// Set HasEpsilon
// ================================================================================================
procedure TLookahead.SetHasEpsilon( HasEpsilon: boolean);
begin
fHasEpsilon := HasEpsilon
end;
// ================================================================================================
// Set Cycle
// ================================================================================================
procedure TLookahead.SetCycle( Cycle: AnsiString);
begin
fCycle := Cycle
end;
// ================================================================================================
// Intersection
//
// What is the intersection of two lookahead depths?
// Only the Epsilon "bit" and bitset are considered.
// ================================================================================================
function TLookahead.Intersection( LA: ILookahead) : ILookahead;
begin
result := TLookahead.Create;
result.LaSet := fLaSet * LA.LaSet;
if fHasEpsilon and LA.HasEpsilon then
result.HasEpsilon := true;
end;
// ================================================================================================
// CombineWith(ILookahead)
// ================================================================================================
procedure TLookahead.CombineWith( LA: ILookahead);
begin
if fCycle = '' then fCycle := LA.Cycle;
fHasEpsilon := fHasEpsilon or LA.HasEpsilon;
fLaSet := fLaSet + LA.LaSet;
fEpsilon := fEpsilon + LA.Epsilon;
end;
// ================================================================================================
// CombineWith(TByteSet)
// ================================================================================================
procedure TLookahead.CombineWith( LA: TByteSet);
begin
fLaSet := fLaSet + LA;
end;
// ================================================================================================
// IsNil
// ================================================================================================
function TLookahead.IsNil: boolean;
begin
result := (fLaSet = []) and not fHasEpsilon;
end;
// ================================================================================================
// Clone
// ================================================================================================
function TLookahead.Clone: ILookahead;
begin
result := TLookahead.Create( self);
end;
// ================================================================================================
// AsString
// ================================================================================================
function TLookahead.AsString( pTM: ITokenManager): AnsiString;
var
depths: AnsiString;
i : integer;
begin
if pTM = nil then
result := CharSetToStr( fLaSet)
else
result := TokenSetToStr( fLaSet, pTM);
if fHasEpsilon = true then
result := result + '+ <epsilon>';
if fCycle <> '' then
result := result + '; FOLLOW( ' + fCycle + ')';
if fEpsilon <> [] then
begin
depths := '';
for i:=0 to 255 do
begin
if (fEpsilon * [i]) <> [] then
begin
if depths <> '' then
depths := depths + ',';
depths := depths + AnsiString(IntToStr( i));
end;
end;
result := result + '; depths=' + depths;
end;
end;
initialization
nullLookahead := TLookahead.Create;
nullLookahead.HasEpsilon := true;
finalization
nullLookahead := nil;
end.