Files
bds.mr.dpg/src.lib/dpglib.LLkAnalyzer.pas
T
2026-01-03 18:33:48 +01:00

1551 lines
56 KiB
ObjectPascal

// ============================================================================
// LLkAnalyzer
//
// Checked....
// ============================================================================
unit dpglib.LLkAnalyzer;
interface
uses
System.Classes,
dpgrtl.types,
dpglib.Types,
dpglib.Lookahead;
type
TLLkAnalyzer = class( TInterfacedObject,
ILLkAnalyzer)
protected
DEBUG_ANALYZER : boolean;
fCurrentBlock : IAlternativeBlock;
fTool : ITool;
fGrammar : IGrammar;
fLexical : boolean;
fCharFormatter : ICharFormatter;
private
function altUsesWildcardDefault( pAlt : IAlternative): boolean;
procedure removeCompetingPredictionSets(var pSet: TByteSet;
pElem: IALternativeElem);
function getAltLookahead( pBlk : IAlternativeBlock;
pAlt : integer;
pK : integer): ILookahead;
function DeterminisiticImpliedPath( pBlk : IBlockWithImpliedExitPath): boolean;
protected
// ------------------------------------------------------------
// ILLkAnalyzer methods
// ------------------------------------------------------------
procedure SetGrammar( pGrammar: IGrammar);
function Deterministic( pBlk: IAlternativeBlock): boolean; overload;
function Deterministic( pBlk: IOneOrMoreBlock) : boolean; overload;
function Deterministic( pBlk: IZeroOrMoreBlock) : boolean; overload;
function Look( k: integer; pElem: IActionElem) : ILookahead; overload;
function Look( k: integer; pBlk : IAlternativeBlock) : ILookahead; overload;
function Look( k: integer; pEnd : IBlockEndElem) : ILookahead; overload;
function Look( k: integer; pAtom: ICharLiteralElem) : ILookahead; overload;
function Look( k: integer; pElem: ICharRangeElem) : ILookahead; overload;
function Look( k: integer; pAtom: IGrammarAtom) : ILookahead; overload;
function Look( k: integer; pBlk: IOneOrMoreBlock) : ILookahead; overload;
function Look( k: integer; pBlk: INMBlock) : ILookahead; overload;
function Look( k: integer; pElem: IRuleBlock) : ILookahead; overload;
function Look( k: integer; pEnd: IRuleEndElem) : ILookahead; overload;
function Look( k: integer; pElem: IRuleRefElem) : ILookahead; overload;
function Look( k: integer; pAtom: IStringLiteralElem) : ILookahead; overload;
function Look( k: integer; pBlk : ISynPredBlock) : ILookahead; overload;
function Look( k: integer; pElem: ITokenRangeElem) : ILookahead; overload;
function Look( k: integer; pElem: ITreeElem) : ILookahead; overload;
function Look( k: integer; pElem: IWildCardElem) : ILookahead; overload;
function Look( k: integer; pBlk: IZeroOrMoreBlock) : ILookahead; overload;
function Look( k: integer; pElem: AnsiString) : ILookahead; overload;
function FOLLOW( k: integer; pEnd: IRuleEndElem) : ILookahead;
function SubRuleCanBeInverted( pBlock : IAlternativeBlock;
pIsLexer : boolean): boolean;
public
constructor Create( pTool: ITool);
destructor Destroy; override;
end;
implementation
uses
System.SysUtils,
dpglib.Messages,
dpglib.Utils,
dpglib.CodeGenerator,
dpglib.DelphiCharFormatter;
{ TLLkAnalyzer }
// ****************************************************************************
// Constructor/destructor
// ****************************************************************************
// ============================================================================
// Constructor
// ============================================================================
constructor TLLkAnalyzer.Create(pTool: ITool);
begin
inherited Create;
DEBUG_ANALYZER := false;
fLexical := false;
fCurrentBlock := nil;
fGrammar := nil;
fTool := pTool;
fCharFormatter := TDelphiCharFormatter.Create;
end;
// ============================================================================
// Destructor
// ============================================================================
destructor TLLkAnalyzer.Destroy;
begin
fCurrentBlock := nil;
fTool := nil;
fGrammar := nil;
fCharFormatter := nil;
inherited;
end;
// ****************************************************************************
// Internals
// ****************************************************************************
// ============================================================================
// ============================================================================
// Deterministic
//
// Is this block of alternatives LL(k)? Fill in alternative cache for this blk.
// Return true if the block is deterministic.
// ============================================================================
function TLLkAnalyzer.Deterministic( pBlk: IAlternativeBlock): boolean;
var
k : integer;
i,j : integer;
l : integer;
nalts : integer;
save : IAlternativeBlock;
elem : IAlternativeElem;
zom : IZeroOrMoreBlock;
oom : IOneOrMoreBlock;
haveAmbig : boolean;
p : ILookahead;
q : ILookahead;
r : array of ILookahead;
sets : TInterfaceList;
ai : IAlternative;
aj : IAlternative;
bei : IBlockEndElem;
bej : IBlockEndElem;
begin
result := true;
nalts := pBlk.Alternatives.Count;
save := fCurrentBlock;
fCurrentBlock := pBlk;
pBlk.QueryInterface(IOneOrMoreBlock, oom);
pBlk.QueryInterface(IZeroOrMoreBlock, zom);
// ---------------------------------------------------------------
// Don't allow nongreedy (...) blocks
// ---------------------------------------------------------------
if (pBlk.Greedy = false) and (oom = nil) and (zom = nil) then
begin
fTool.Warning( MSG_W_INVNONGREEDY,
fGrammar.GrammarFile,
pBlk.Line,
pBlk.Column);
end;
// ---------------------------------------------------------------
// SPECIAL CASE: only one alternative. We don't need to check the
// determinism, but other code expects the lookahead cache to be
// set for the single alt.
// ---------------------------------------------------------------
if nalts = 1 then
begin
elem := pBlk.Alternative[0].Head;
fCurrentBlock.AltI := 0;
if pBlk.Alternative[0].CacheSize < 2 then
pBlk.Alternative[0].CacheSize := 2;
pBlk.Alternative[0].Cache[1] := elem.Look(1);
pBlk.Alternative[0].LookaheadDepth := 1;
fCurrentBlock := save;
result := true;
exit;
end;
// ---------------------------------------------------------------
// GENERAL CASE
// ---------------------------------------------------------------
SetLength( r, fGrammar.MaxK +1);
for i:=0 to nalts -1 do
begin
fCurrentBlock.AltI := i;
for j:=i+1 to nalts-1 do
begin
fCurrentBlock.AltJ := j;
fCurrentBlock.AnalyzisAlt := j;
// ---------------------------------------------------------
// Always attempt minimum lookahead possible.
// ---------------------------------------------------------
k := 1;
// ---------------------------------------------------------
// Check to see if there is a lookahead depth that
// distinguishes between alternatives i and j.
// ---------------------------------------------------------
haveAmbig := true;
while haveAmbig and (k <= fGrammar.MaxK) do
begin
haveAmbig := false;
p := getAltLookahead( pBlk, i, k);
q := getAltLookahead( pBlk, j, k);
r[k] := p.Intersection(q);
if not r[k].IsNil then
begin
haveAmbig := true;
INC(k);
end;
end;
ai := pBlk.Alternative[i];
aj := pBlk.Alternative[j];
if haveAmbig then
begin
result := false;
ai.LookaheadDepth := NONDETERMINISTIC;
aj.LookaheadDepth := NONDETERMINISTIC;
ai.Head.QueryInterface( IBlockEndElem, bei);
aj.Head.QueryInterface( IBlockEndElem, bej);
// ------------------------------------------------------
// if ith alt starts with a syntactic predicate, computing
// the lookahead is still done for code generation, but
// messages should not be generated when comparing against
// alt j. Alternatives with syn preds that are unnecessary
// do not result in syn pred try-blocks.
// ------------------------------------------------------
if ai.SynPred <> nil then
// ---------------------------------------------------
// The alt with the (...)=> block is nondeterministic
// for sure. If the (...)=> conflicts with alt j, j is
// nondeterministic. This prevents alt j from being in
// any switch statements.
// Move on to next alternative=>no possible ambiguity!
// ---------------------------------------------------
// ------------------------------------------------------
// if ith alt starts with a semantic predicate, computing
// the lookahead is still done for code generation, but
// messages should not be generated when comparing against
// alt j.
// ------------------------------------------------------
else if ai.SemPred <> '' then
// ------------------------------------------------------
// if jth alt is exactly the wildcard or wildcard root of
// tree, then remove elements of alt i lookahead from alt
// j's lookahead.
// Don't do an ambiguity warning.
// ------------------------------------------------------
else if altUsesWildcardDefault( aj) then
// ------------------------------------------------------
// If the user specified warnWhenFollowAmbig=false, then we
// can turn off this warning IF one of the alts is empty;
// that is, it points immediately at the end block.
// ------------------------------------------------------
else if (pBlk.WarnFollowAmbig = false) and
((bei <> nil) or (bej <> nil)) then
// ------------------------------------------------------
// If they have the generateAmbigWarnings option off for
// the block then don't generate a warning.
// ------------------------------------------------------
else if pBlk.GenAmbigWarnings = false then
// ------------------------------------------------------
// If greedy=true and *one* empty alt shut off warning.
// ------------------------------------------------------
else if pBlk.Greedy and pBlk.GreedySet and
(((bei = nil) and (bej <> nil)) or
((bei <> nil) and (bej = nil)))then
// ------------------------------------------------------
// We have no choice, but to report a nondetermism.
// ------------------------------------------------------
else
begin
sets := TInterfaceList.Create;
for l:=1 to fGrammar.MaxK do
sets.Add( r[l]);
fTool.WarnAltAmbiguity( fGrammar,
pBlk,
fLexical,
fGrammar.MaxK,
sets,
i,
j);
FreeAndNil( sets);
end;
end
else
begin
if ai.LookaheadDepth < k then ai.LookaheadDepth := k;
if aj.LookaheadDepth < k then aj.LookaheadDepth := k;
end;
end;
end;
r := nil;
fCurrentBlock := save;
end;
// ============================================================================
// ============================================================================
// Deterministic (...)+
//
// Is (...)+ block LL(1)? Fill in alternative cache for this block.
// return true if the block is deterministic
// ============================================================================
function TLLkAnalyzer.Deterministic(pBlk: IOneOrMoreBlock): boolean;
var
save : IAlternativeBlock;
blkOK : boolean;
det : boolean;
begin
save := fCurrentBlock;
fCurrentBlock := pBlk;
blkOK := Deterministic( pBlk as IAlternativeBlock);
// ---------------------------------------------------------------
// Block has been checked, now check that what follows does not
// conflict with the lookahead of the (...)+ block.
// ---------------------------------------------------------------
det := DeterminisiticImpliedPath( pBlk);
fCurrentBlock := save;
result := blkOK and det;
end;
// ============================================================================
// ============================================================================
// Deterministic (...)*
//
// Is (...)* block LL(1)? Fill in alternative cache for this block.
// return true if the block is deterministic
// ============================================================================
function TLLkAnalyzer.Deterministic(pBlk: IZeroOrMoreBlock): boolean;
var
save : IAlternativeBlock;
blkOK : boolean;
det : boolean;
begin
save := fCurrentBlock;
fCurrentBlock := pBlk;
blkOK := Deterministic( pBlk as IAlternativeBlock);
// ---------------------------------------------------------------
// Block has been checked, now check that what follows does not
// conflict with the lookahead of the (...)* block.
// ---------------------------------------------------------------
det := DeterminisiticImpliedPath( pBlk);
fCurrentBlock := save;
result := blkOK and det;
end;
// ============================================================================
// ============================================================================
// DeterminisiticImpliedPath
//
// Is this (...)* or (...)+ LL(k)?
// ============================================================================
function TLLkAnalyzer.DeterminisiticImpliedPath( pBlk: IBlockWithImpliedExitPath): boolean;
var
k : integer;
alt : IAlternative;
alts : TInterfaceList;
nalts : integer;
i : integer;
l : integer;
be : IBlockEndElem;
p : ILookahead;
r : array of ILookahead;
sets : TInterfaceList;
haveAmbig : boolean;
follow : ILookahead;
begin
result := true;
alts := pBlk.Alternatives;
nalts := alts.Count;
fCurrentBlock.AltJ := -1;
for i:=0 to nalts-1 do
begin
alt := pBlk.Alternative[i];
if alt.Head.QueryInterface(IBlockEndElem,be) = S_OK then
begin
fTool.Warning( MSG_W_INVEMPTYALT,
fGrammar.GrammarFile,
pBlk.Line,
pBlk.Column);
end;
// ------------------------------------------------------------
// Assume each alternative is LL(1) with exit branch.
// ------------------------------------------------------------
k := 1;
// ------------------------------------------------------------
// Check to see if there is a lookahead depth that distinguishes
// between alternative i and the exit branch.
// ------------------------------------------------------------
SetLength( r, fGrammar.MaxK +1);
fCurrentBlock.AltI := i;
haveAmbig := true;
while haveAmbig and (k <= fGrammar.MaxK) do
begin
haveAmbig := false;
follow := pBlk.Next.Look(k);
pBlk.ExitCache[k] := follow;
p := getAltLookahead( pBlk, i, k);
r[k] := follow.Intersection(p);
if not r[k].IsNil then
begin
haveAmbig := true;
INC(k);
end;
end;
if haveAmbig then
begin
result := false;
alt.LookaheadDepth:= NONDETERMINISTIC;
pBlk.ExitDepth := NONDETERMINISTIC;
// ---------------------------------------------------------
// If the user specified warnWhenFollowAmbig=false, then we
// can turn off this warning.
// ---------------------------------------------------------
if not pBlk.WarnFollowAmbig then
// ---------------------------------------------------------
// If they have the generateAmbigWarnings option off for the block
// then don't generate a warning.
// ---------------------------------------------------------
else if not pBlk.GenAmbigWarnings then
// ---------------------------------------------------------
// If greedy=true and alt not empty, shut off warning
// ---------------------------------------------------------
else if pBlk.Greedy and pBlk.GreedySet and (be = nil) then
// ---------------------------------------------------------
// If greedy=false then shut off warning...will have
// to add "if FOLLOW break"
// block during code gen to compensate for removal of warning.
// ---------------------------------------------------------
else if (not pBlk.Greedy) and (be = nil) then
(* // if FOLLOW not single k-string (|set[k]| can
// be > 1 actually) then must warn them that
// loop may terminate incorrectly.
// For example, ('a'..'d')+ ("ad"|"cb")
if (!lookaheadEquivForApproxAndFullAnalysis(blk.exitCache, grammar.maxk)) {
tool.warning(new AnsiString[]{
"nongreedy block may exit incorrectly due",
"\tto limitations of linear approximate lookahead (first k-1 sets",
"\tin lookahead not singleton)."},
grammar.getFilename(), blk.getLine(), blk.getColumn());
*)
// ---------------------------------------------------------
// No choice but to generate a warning
// ---------------------------------------------------------
else
begin
sets := TInterfaceList.Create;
for l:= 1 to fGrammar.MaxK do
sets.Add( r[l]);
fTool.WarnAltExitAmbiguity( fgrammar,
pBlk,
fLexical,
fGrammar.MaxK,
sets,
i);
FreeAndNil(sets);
end;
end
else
begin
if alt.LookaheadDepth < k then alt.LookaheadDepth := k;
if pBlk.ExitDepth < k then pBlk.ExitDepth := k;
end;
end;
r := nil;
end;
// ============================================================================
// ============================================================================
// FOLLOW
//
// Compute the lookahead set of whatever follows references to
// the rule associated witht the FOLLOW block.
// ============================================================================
function TLLkAnalyzer.FOLLOW(k: integer; pEnd: IRuleEndElem): ILookahead;
var
ts : ITokenSymbol;
rb : IRuleBlock;
rs : IRuleSymbol;
re : IRuleEndElem;
rr : IRuleRefElem;
lg : ILexerGrammar;
rule : AnsiString;
i : integer;
q : ILookahead;
begin
// ---------------------------------------------------------------
// What rule are we trying to compute FOLLOW of?
// ---------------------------------------------------------------
pEnd.Block.QueryInterface(IRuleBlock, rb);
if fLexical then
rule := TCodeGenerator.encodeLexerRuleName( rb.RuleName)
else
rule := rb.RuleName;
// ---------------------------------------------------------------
// Are we in the midst of computing this FOLLOW already.
// ---------------------------------------------------------------
if pEnd.Lock[k] then
begin
result := TLookahead.Create( rule);
exit;
end;
// ---------------------------------------------------------------
// Check to see if there is cached value.
// ---------------------------------------------------------------
if pEnd.Cache[k] <> nil then
begin
// ------------------------------------------------------------
// If the cache is a complete computation then simply return it
// ------------------------------------------------------------
if pEnd.Cache[k].Cycle = '' then
begin
result := pEnd.Cache[k].clone;
exit;
end;
// ------------------------------------------------------------
// A cache entry exists, but it is a reference to a cyclic com-
// putation.
// ------------------------------------------------------------
ts := fGrammar.Symbol[pEnd.Cache[k].Cycle];
ts.QueryInterface( IRuleSymbol, rs);
re := rs.Block.EndElem;
// ------------------------------------------------------------
// The other entry may not exist because it is still being
// computed when this cycle cache entry was found here.
// ------------------------------------------------------------
if re.Cache[k] = nil then
begin
// ---------------------------------------------------------
// return the cycle...that's all we can do at the moment.
// ---------------------------------------------------------
result := pEnd.Cache[k].clone;
exit;
end
else
begin
// ---------------------------------------------------------
// Replace this cache entry with the entry from the
// referenced computation. Eventually, this percolates a
// complete (no cycle reference) cache entry to this node
// (or at least gets it closer and closer). This is not
// crucial, but makes cache lookup faster as we might have
// to look up lots of cycle references before finding a
// complete reference.
// ---------------------------------------------------------
pEnd.Cache[k] := re.Cache[k].clone;
result := re.Cache[k].clone;
exit;
end;
end;
pEnd.Lock[k] := true;
result := TLookahead.Create;
ts := fGrammar.Symbol[rule];
ts.QueryInterface( IRuleSymbol, rs);
// ---------------------------------------------------------------
// Walk list of references to this rule to compute FOLLOW
// ---------------------------------------------------------------
for i:=0 to rs.ReferenceCount-1 do
begin
rr := rs.Reference[i];
q := rr.Next.Look(k);
// ------------------------------------------------------------
// If there is a cycle then if the cycle is to the rule for
// this end block, you have a cycle to yourself. Remove the
// cycle indication--the lookahead is complete.
// ------------------------------------------------------------
if q.Cycle = rule then
q.Cycle := '';
// ------------------------------------------------------------
// Add the lookahead into the curretn FOLLOW computation set.
// ------------------------------------------------------------
result.CombineWith( q);
end;
pEnd.Lock[k] := false;
// ---------------------------------------------------------------
// If no rules follow this, it can be a start symbol or called by
// a start symbol. Set the follow to be end of file.
// ---------------------------------------------------------------
// if result.IsNil and (result.Cycle = '') then
if (result.LaSet = []) and (result.Cycle = '') then
begin
// ------------------------------------------------------------
// Lexical grammars use Epsilon to indicate that the end of rule
// has been hit. EOF would be misleading; any AnsiCharacter can
// follow a token rule not just EOF as in a grammar (where a
// start symbol is followed by EOF). There is no sequence info
// in a lexer between tokens to indicate what is the last token
// to be seen.
// ------------------------------------------------------------
if fGrammar.QueryInterface(ILexerGrammar, lg) = S_OK then
result.HasEpsilon := true
else
result.LaSet := result.LaSet + [TT_EOF]
end;
// ---------------------------------------------------------------
// Cache the result of the FOLLOW computation.
// ---------------------------------------------------------------
pEnd.Cache[k] := result.clone;
end;
// ============================================================================
// ============================================================================
// getAltLookahead
// ============================================================================
function TLLkAnalyzer.getAltLookahead( pBlk : IAlternativeBlock;
pAlt : integer;
pK : integer): ILookahead;
var
alt : IAlternative;
elem : IAlternativeElem;
begin
alt := pBlk.Alternative[pAlt];
elem := alt.Head;
if alt.Cache[pK] = nil then
alt.Cache[pK] := elem.Look( pK);
result := alt.Cache[pK].clone;
end;
// ============================================================================
// ============================================================================
// Look (Action)
// ============================================================================
function TLLkAnalyzer.Look(k: integer; pElem: IActionElem): ILookahead;
begin
result := pElem.Next.Look(k);
end;
// ============================================================================
// ============================================================================
// Look (AlternativeBlock)
// ============================================================================
function TLLkAnalyzer.Look(k: integer; pBlk: IAlternativeBlock): ILookahead;
var
save : IAlternativeBlock;
elem : IAlternativeElem;
alt : IAlternative;
i : integer;
begin
save := fCurrentBlock;
fCurrentBlock := pBlk;
result := TLookahead.Create;
for i:=0 to pBlk.Alternatives.Count -1 do
begin
fCurrentBlock.AnalyzisAlt := i;
alt := pBlk.Alternative[i];
elem := alt.Head;
result.CombineWith( elem.Look(k));
end;
if (k=1) and pBlk.IsNot and SubRuleCanBeInverted( pBlk, fLexical) then
begin
if fLexical then
result.LaSet := fGrammar.CharVocabulary - result.LaSet
else
result.LaSet := [TT_USER..fGrammar.TokenManager.MaxTokenType] - result.LaSet;
end;
fCurrentBlock := save;
end;
// ============================================================================
// Look (BlockEnd)
//
// Compute what follows this place-holder node and possibly what begins the
// associated loop unless the node is locked.
//
// If we hit the end of a loop, we have to include what tokens can begin the
// loop as well. If the start node is locked, then we simply found an empty
// path through this subrule while analyzing it. If the start node is not
// locked, then this node was hit during a FOLLOW operation and the FIRST of
// this block must be included in that lookahead computation.
// ============================================================================
function TLLkAnalyzer.Look(k: integer; pEnd: IBlockEndElem): ILookahead;
var
zom : IZeroOrMoreBlock;
oom : IOneOrMoreBlock;
spb : ISynPredBlock;
begin
// ---------------------------------------------------------------
// computation in progress => the tokens we would have computed
// (had we not been locked) will be included in the set by that
// computation with the lock on this node.
// ---------------------------------------------------------------
if pEnd.Lock[k] then
begin
result := TLookahead.Create;
exit;
end;
// ---------------------------------------------------------------
// Hitting the end of loop means you can see what begins the loop
// ---------------------------------------------------------------
pEnd.Block.QueryInterface( IZeroOrMoreBlock, zom);
pEnd.Block.QueryInterface( IOneOrMoreBlock, oom);
if (zom <> nil) or (oom <> nil) then
begin
// ------------------------------------------------------------
// Compute what can start the block, but lock end-node so
// we don't do it twice in the same computation.
// ------------------------------------------------------------
pEnd.Lock[k] := true;
result := Look( k, pEnd.Block);
pEnd.Lock[k] := false;
end
else
result := TLookahead.Create;
// ---------------------------------------------------------------
// Syntactic predicates such as ( (A)? )=> have no follow per
// se. We cannot accurately say what would be matched following
// a syntactic predicate (you MIGHT be ok if you said it was
// whatever followed the alternative predicted by the predicate).
// Hence, (like end-of-token) we return Epsilon to indicate
// "unknown lookahead."
// ---------------------------------------------------------------
if pEnd.QueryInterface( ISynPredBlock, spb) = S_OK then
result.HasEpsilon := true
// ------------------------------------------------------------
// Compute what can follow the block
// ------------------------------------------------------------
else
result.CombineWith( pEnd.Block.Next.Look(k)); //???
end;
// ============================================================================
// ============================================================================
// Look (CharLiteral)
//
// Return this AnsiChar as the lookahead if k=1.
// ### Doesn't work for ( 'a' 'b' | 'a' ~'b' ) yet!!!
//
// If the atom has the 'not' flag on, then create the set complement of the
// tokenType which is the set of all AnsiCharacters referenced in the grammar with
// this AnsiChar turned off.
// Also remove AnsiCharacters from the set that are currently allocated for
// predicting previous alternatives. This avoids ambiguity messages and is more
// properly what is meant.
//
// NOTE: we do NOT include exit path in the exclusion set. E.g.,
// ( 'a' | ~'a' )* 'b'
// should exit upon seeing a 'b' during the loop.
// ============================================================================
function TLLkAnalyzer.Look(k: integer; pAtom: ICharLiteralElem): ILookahead;
var
b: TByteSet;
begin
// ---------------------------------------------------------------
// Handle lexer case
// ---------------------------------------------------------------
if fLexical then
begin
// ------------------------------------------------------------
// Skip until analysis hits k=1
// ------------------------------------------------------------
if k > 1 then
begin
result := pAtom.Next.Look(k-1);
exit;
end;
// ------------------------------------------------------------
// Inverted AnsiCharacter literal e.g.: ~'c'
// ------------------------------------------------------------
if pAtom.IsNot then
begin
b := fGrammar.CharVocabulary;
// ---------------------------------------------------------
// Remove stuff predicted by preceding alts and follow of
// block.
// ---------------------------------------------------------
removeCompetingPredictionSets( b, pAtom);
// ---------------------------------------------------------
// Remove elem that is stated not to be in the set
// ---------------------------------------------------------
b := b - [pAtom.TokenType];
result := TLookahead.Create(b);
end
// ------------------------------------------------------------
// Non-inverted AnsiCharacter literal e.g.: 'c'
// ------------------------------------------------------------
else
result := TLookahead.Create( pAtom.TokenType);
end
// ---------------------------------------------------------------
// Handle parser/treewalker case. AnsiCharacter literal reference is
// invalid in non-lexer grammars. This should have been avoided by
// GrammarMaker.
// ---------------------------------------------------------------
else
begin
fTool.Panic('Character literal reference found in parser');
result := nil;
end
end;
// ============================================================================
// ============================================================================
// Look (CharRange)
// ============================================================================
function TLLkAnalyzer.Look(k: integer; pElem: ICharRangeElem): ILookahead;
begin
if k > 1 then
result := pElem.Next.Look(k-1)
else
result := TLookahead.Create([ord(pElem.BeginChar)..ord(pElem.EndChar)]);
end;
// ============================================================================
// ============================================================================
// Look (GrammarAtom)
// ============================================================================
function TLLkAnalyzer.Look(k: integer; pAtom: IGrammarAtom): ILookahead;
var
b: TByteSet;
begin
// ---------------------------------------------------------------
// Handle parser/treewalker case.
// ---------------------------------------------------------------
if not fLexical then
begin
// ------------------------------------------------------------
// Skip until analysis hits k=1
// ------------------------------------------------------------
if k > 1 then
begin
result := pAtom.Next.Look( k-1);
exit;
end;
// ------------------------------------------------------------
// Inverted token reference e.g.: ~INTEGER
// ------------------------------------------------------------
if pAtom.IsNot then
begin
b := [TT_USER..fGrammar.TokenManager.MaxTokenType];
// ---------------------------------------------------------
// Remove stuff predicted by preceding alts and follow of
// block.
// ---------------------------------------------------------
removeCompetingPredictionSets( b, pAtom);
// ---------------------------------------------------------
// Remove elem that is stated not to be in the set
// ---------------------------------------------------------
b := b - [pAtom.TokenType];
result := TLookahead.Create(b);
end
// ------------------------------------------------------------
// Non-inverted token reference e.g.: HEXINT
// ------------------------------------------------------------
else
result := TLookahead.Create( pAtom.TokenType);
end
// ---------------------------------------------------------------
// Handle lexer case. Token reference is not valid in lexer
// grammars. This should have been avoided by GrammarMaker.
// ---------------------------------------------------------------
else begin
fTool.Panic('Token reference found in lexer.');
result := nil;
end;
end;
// ============================================================================
// ============================================================================
// Look (...)+
//
// The lookahead of a (...)+ block is the combined lookahead of all alternatives
// and, if an empty path is found, the lookahead of what follows the block.
// ============================================================================
function TLLkAnalyzer.Look(k: integer; pBlk: IOneOrMoreBlock): ILookahead;
begin
result := Look( k, pBlk as IAlternativeBlock);
end;
// ================================================================================================
// Look (...)@(n,m)
// ================================================================================================
function TLLkAnalyzer.Look( k: integer; pBlk: INMBlock): ILookahead;
var
i: integer;
begin
if k <= pBlk.Low then
result := look(k, pBlk as IAlternativeBlock)
else if k <= pBlk.High then
begin
result := look(k, pBlk as IAlternativeBlock);
for i:=pBlk.Low+1 to k do
result.CombineWith( pBlk.Next.Look(k-i+1));
end
else {k > pBlk.High}
begin
result := TLookahead.Create; // create empty one
for i:=pBlk.Low+1 to k do
result.CombineWith( pBlk.Next.Look(k-i+1));
end
end;
// ============================================================================
// ============================================================================
// Look (...)*
//
// The (...)* element is the combined lookahead of the alternatives and what can
// follow the loop.
// ============================================================================
function TLLkAnalyzer.Look(k: integer; pBlk: IZeroOrMoreBlock): ILookahead;
begin
result := look( k, pBlk as IAlternativeBlock);
result.CombineWith( pBlk.Next.Look(k));
end;
// ============================================================================
// ============================================================================
// Look (RuleBlock)
//
// Combine the lookahead computed for each alternative. Lock the node so that
// no other computation may come back on itself -- infinite loop. This also
// implies infinite left-recursion in the grammar
// (or an error in this algorithm ;)).
// ============================================================================
function TLLkAnalyzer.Look(k: integer; pElem: IRuleBlock): ILookahead;
begin
result := Look( k, pElem as IAlternativeBlock);
end;
// ============================================================================
// ============================================================================
// Look (RuleEnd)
//
// Lexical rules never compute follow. They set epsilon and the code generator
// generates code to check for any AnsiCharacter. The code generator must remove the
// tokens used to predict any previous alts in the same block.
//
// When the last node of a rule is reached and noFOLLOW, it implies that a
// "local" FOLLOW will be computed after this call. I.e.,
//
// a : b A;
// b : B | ;
// c : b C;
//
// Here, when computing the look of rule b from rule a, we want only
// {B,EPSILON_TYPE} so that look(b A) will be {B,A} not {B,A,C}.
//
// If the end block is not locked and the FOLLOW is wanted, the algorithm must
// compute the lookahead of what follows references to this rule. If end block
// is locked, FOLLOW will return an empty set with a cycle to the rule
// associated with this end block.
// ============================================================================
function TLLkAnalyzer.Look(k: integer; pEnd: IRuleEndElem): ILookahead;
begin
if pEnd.noFOLLOW or fLexical then
begin
result := TLookahead.Create;
result.HasEpsilon := true;
result.Epsilon := [k];
end
else
result := FOLLOW( k, pEnd);
end;
// ============================================================================
// ============================================================================
// Look (RuleRef)
//
// When computing ruleref lookahead, we don't want the FOLLOW computation done
// if an empty path exists for the rule. The FOLLOW is too loose of a set...
// we want only to include the "local" FOLLOW or what can follow this
// particular ref to the node. In other words, we use context information to
// reduce the complexity of the analysis and strengthen the parser.
//
// The noFOLLOW flag is used as a means of restricting the FOLLOW to a
// "local" FOLLOW. This variable is orthogonal to the 'lock' variable that
// prevents infinite recursion. noFOLLOW does not care about what k is.
// ============================================================================
function TLLkAnalyzer.Look(k: integer; pElem: IRuleRefElem): ILookahead;
var
ts : ITokenSymbol;
rs : IRuleSymbol;
rb : IRuleBlock;
re : IRuleEndElem;
se : boolean;
p : ILookahead;
q : ILookahead;
depths: TByteSet;
i : integer;
rname : AnsiString;
cname : AnsiString;
begin
rs := nil;
ts := fGrammar.Symbol[pElem.TargetRule];
if ts <> nil then
ts.QueryInterface(IRuleSymbol, rs);
// ---------------------------------------------------------------
// The symbol not exists in the grammar.
// ---------------------------------------------------------------
if rs = nil then
begin
if fLexical then
rname := TCodeGenerator.decodeLexerRuleName( pElem.TargetRule)
else
rname := pElem.TargetRule;
fTool.Error(Format( MSG_E_RULENOTDEFINED, [rname]),
fGrammar.GrammarFile,
pElem.Line,
pElem.Column);
// fGrammar.Tool.Error( 'No definition of rule "' + rname + '"',
// fGrammar.GrammarFile,
// pElem.Line,
// pElem.Column);
result := TLookahead.Create;
exit;
end;
// ---------------------------------------------------------------
// The symbol not defined in the grammar
// ---------------------------------------------------------------
if not rs.Defined then
begin
if fLexical then
rname := TCodeGenerator.decodeLexerRuleName( pElem.TargetRule)
else
rname := pElem.TargetRule;
fTool.Error(Format( MSG_E_RULENOTDEFINED, [rname]),
fGrammar.GrammarFile,
pElem.Line,
pElem.Column);
// fGrammar.Tool.Error( 'No definition of rule "' + rname + '"',
// fGrammar.GrammarFile,
// pElem.Line,
// pElem.Column);
result := TLookahead.Create;
exit;
end;
rb := rs.Block;
re := rb.EndElem;
se := re.noFOLLOW;
re.noFOLLOW := true;
// ---------------------------------------------------------------
// Go off the rule and get the lookahead (w/o FOLLOW)
// ---------------------------------------------------------------
p := Look( k, pElem.TargetRule);
// ---------------------------------------------------------------
// Restore state of end block
// ---------------------------------------------------------------
re.noFOLLOW := se;
// ---------------------------------------------------------------
// Check for infinite recursion. If a cycle is returned: trouble!!
// ---------------------------------------------------------------
if p.Cycle <> '' then
begin
if fLexical then
begin
rname := TCodeGenerator.decodeLexerRuleName( pElem.TargetRule);
cname := TCodeGenerator.decodeLexerRuleName( p.Cycle);
end
else
begin
rname := pElem.TargetRule;
cname := p.Cycle;
end;
fTool.Error(Format( MSG_E_INFRECURSION, [cname,rname]),
fGrammar.GrammarFile,
pElem.Line,
pElem.Column);
// fTool.Error('infinite recursion to rule "' +
// cname +
// '" from rule " ' +
// rname +
// '"',
// fGrammar.GrammarFile,
// pElem.Line,
// pElem.Column);
end;
// ---------------------------------------------------------------
// Is the local FOLLOW required?
// ---------------------------------------------------------------
if p.HasEpsilon then
begin
// ------------------------------------------------------------
// Remove Epsilon
// ------------------------------------------------------------
p.HasEpsilon := false;
// ------------------------------------------------------------
// For each lookahead depth that saw epsilon
//
// Note: any of these look() computations for local follow can
// set EPSILON in the set again if the end of this rule
// is found.
// ------------------------------------------------------------
depths := p.Epsilon;
p.Epsilon:= [];
for i:=0 to 255 do
begin
if (depths * [i]) <> [] then
begin
q := pElem.Next.Look( k - (k - i));
p.CombineWith(q);
end;
end;
end;
result := p;
end;
// ============================================================================
// ============================================================================
// Look (StringLiteral)
// ============================================================================
function TLLkAnalyzer.Look( k : integer;
pAtom : IStringLiteralElem): ILookahead;
begin
// ---------------------------------------------------------------
// Create Lookahead for lexer grammar
// ---------------------------------------------------------------
if fLexical then
begin
if k > Length( pAtom.ProcessedAtomText) then
result := pAtom.Next.Look( k - Length( pAtom.ProcessedAtomText))
else
result := TLookahead.Create( ord(pAtom.ProcessedAtomText[k]));
end
// ---------------------------------------------------------------
// Create Lookahead for non-lexer grammar
// ---------------------------------------------------------------
else
begin
if k > 1 then
result := pAtom.Next.Look( k-1)
else
begin
if pAtom.IsNot then
result := TLookahead.Create([TT_USER..fGrammar.TokenManager.MaxTokenType] - [pAtom.TokenType])
else
result := TLookahead.Create( pAtom.TokenType);
end;
end;
end;
// ============================================================================
// ============================================================================
// Look (...)=>
//
// The lookahead of a (...)=> block is the lookahead of what follows the block.
// By definition, the syntactic predicate block defines static analysis (you
// want to try it out at run-time).
// The LOOK of (a)=>A B is A for LL(1)
// ============================================================================
function TLLkAnalyzer.Look(k: integer; pBlk: ISynPredBlock): ILookahead;
begin
result := pBlk.Next.Look(k);
end;
// ============================================================================
// ============================================================================
// Look (TT_XXX .. TT_YYY)
// ============================================================================
function TLLkAnalyzer.Look(k: integer; pElem: ITokenRangeElem): ILookahead;
begin
if k > 1 then
result := pElem.Next.Look(k-1)
else
result := TLookahead.Create([pElem.BeginToken..pElem.EndToken]);
end;
// ============================================================================
// ============================================================================
// Look (.)
// ============================================================================
function TLLkAnalyzer.Look(k: integer; pElem: IWildCardElem): ILookahead;
var
b : TByteSet;
begin
// ---------------------------------------------------------------
// Skip until analysis hits k=1
// ---------------------------------------------------------------
if k > 1 then
result := pElem.Next.Look( k - 1)
else
begin
if fLexical then
b := fGrammar.CharVocabulary
else
b := [TT_USER..fGrammar.TokenManager.MaxTokenType];
{ TODO : look(wildcard) delete 'removeCompeting...' if don't needed }
removeCompetingPredictionSets( b, pElem);
result := TLookahead.Create( b);
end;
end;
// ============================================================================
// ============================================================================
// Look (rule name)
//
// Compute the combined lookahead for all productions of a rule. If the
// lookahead returns with epsilon, at least one epsilon path exists (one that
// consumes no tokens). The noFOLLOW flag being set for this endruleblk,
// indicates that the a rule ref invoked this rule.
//
// Currently only look(RuleRef) calls this. There is no need for the code
// generator to call this.
// ============================================================================
function TLLkAnalyzer.Look(k: integer; pElem: AnsiString): ILookahead;
var
ts : ITokenSymbol;
rs : IRuleSymbol;
rb : IRuleBlock;
begin
ts := fGrammar.Symbol[pElem];
ts.QueryInterface( IRuleSymbol, rs);
rb := rs.Block;
// ---------------------------------------------------------------
// Check for infinite recursion
// ---------------------------------------------------------------
if rb.Lock[k] then
begin
result := TLookahead.Create( pElem);
exit;
end;
// ---------------------------------------------------------------
// Well, the lookahead wasn't computed before, so do it.
// ---------------------------------------------------------------
if rb.Cache[k] = nil then
begin
rb.Lock [k] := true;
rb.Cache[k] := Look( k, rb as IRuleBlock);
rb.Lock [k] := false;
end;
result := rb.Cache[k].clone;
end;
// ----------------------------------------------------------------------------
// Look (tree)
// ----------------------------------------------------------------------------
function TLLkAnalyzer.Look(k: integer; pElem: ITreeElem): ILookahead;
begin
{ TODO : look(tree) not implemented }
if k > 1 then
result := pElem.Next.Look( k-1)
else
begin
result := nil;
end;
end;
// ============================================================================
// ============================================================================
// SetGrammar
// ============================================================================
procedure TLLkAnalyzer.SetGrammar(pGrammar: IGrammar);
var
lg : ILexerGrammar;
begin
fGrammar := pGrammar;
if fGrammar.QueryInterface(ILexerGrammar,lg) = S_OK then
fLexical := true
else
fLexical := false;
end;
// ============================================================================
// ============================================================================
// altUsesWildcardDefault
//
// Return true is someone used the '.' wildcard default idiom, which means the
// alternative has only two elems: wildcard-elem followed by block-end-elem.
// ============================================================================
function TLLkAnalyzer.altUsesWildcardDefault( pAlt: IAlternative): boolean;
var
head : IAlternativeElem;
wc : IWildcardElem;
be : IBlockEndElem;
begin
wc := nil;
be := nil;
result := false;
head := pAlt.Head;
head.QueryInterface( IWildcardElem, wc);
if head.next <> nil then
head.Next.QueryInterface( IBlockEndElem, be);
if (wc <> nil) and (be <> nil) then
result := true;
end;
// ============================================================================
// ============================================================================
// subRuleCanBeInverted
// ============================================================================
function TLLkAnalyzer.subRuleCanBeInverted( pBlock : IAlternativeBlock;
pIsLexer : boolean): boolean;
var
zom : IZeroOrMoreBlock;
oom : IOneOrMoreBlock;
spb : ISynPredBlock;
i : integer;
alt : IAlternative;
elt : IAlternativeElem;
cLit : ICharLiteralElem;
cRng : ICharRangeELem;
tRef : ITokenRefElem;
tRng : ITokenRangeElem;
sLit : IStringLiteralElem;
be : IBlockEndElem;
begin
result := false;
// ---------------------------------------------------------------
// Cannot invert (...)*, (...)+, (...)=>
// ---------------------------------------------------------------
pBlock.QueryInterface( IZeroOrMoreBlock, zom);
pBlock.QueryInterface( IOneOrMoreBlock, oom);
pBlock.QueryInterface( ISynPredBlock, spb);
if (zom <> nil) or (oom <> nil) or (spb <> nil) then
exit;
// ---------------------------------------------------------------
// Cannot invert an empty subrule
// ---------------------------------------------------------------
if pBlock.Alternatives.Count = 0 then
exit;
// ---------------------------------------------------------------
// The block must only contain alternatives with a single element,
// where each element is a AnsiChar, token, AnsiChar range or token range.
// ---------------------------------------------------------------
for i:=0 to pBlock.Alternatives.Count -1 do
begin
alt := pBlock.Alternative[i];
// ------------------------------------------------------------
// Cannot have anything interesting in the alternative ...
// ------------------------------------------------------------
if (alt.SynPred <> nil) or
(alt.SemPred <> '') or
(alt.ExHandlerType <> '') then
begin
exit;
end;
// ------------------------------------------------------------
// ... and there must be one simple element
// ------------------------------------------------------------
elt := alt.Head;
elt.QueryInterface( ICharLiteralElem, cLit);
elt.QueryInterface( ICharRangeElem, cRng);
elt.QueryInterface( ITokenRefElem, tRef);
elt.QueryInterface( ITokenRangeElem, tRng);
elt.QueryInterface( IStringLiteralElem, sLit);
elt.Next.QueryInterface( IBlockEndElem, be);
if( not (( cLit <> nil) or
( cRng <> nil) or
( tRef <> nil) or
( tRng <> nil) or
((sLit <> nil) and pIsLexer))
or
(be = nil)
or
(pBlock.AutoGenType <> AUTOGEN_NONE))
then
begin
exit;
end;
end;
result := true;
end;
// ============================================================================
// ============================================================================
// removeCompetingPredictionSets
//
// Remove the prediction sets from preceding alternatives, but *only* if this
// element is the first element of the alternative. The class members
// 'fCurrentBlock' and 'fCurrentBlock.AnalysisAlt' must be set correctly.
// ============================================================================
procedure TLLkAnalyzer.removeCompetingPredictionSets( var pSet : TByteSet;
pElem : IAlternativeElem);
var
i : integer;
head : IGrammarElem;
elem : IAlternativeElem;
begin
// ---------------------------------------------------------------
// Only do this if the element is the first element of the alter-
// native, because we are making an implicit assumption that k==1.
// ---------------------------------------------------------------
head := fCurrentBlock.Alternative[fCurrentBlock.AnalyzisAlt].Head;
if pElem = head then
begin
for i:=0 to fCurrentBlock.AnalyzisAlt -1 do
begin
elem := fCurrentBlock.Alternative[i].Head;
pSet := pSet - elem.Look(1).LaSet;
end
end;
end;
end.