// ============================================================================ // LLkAnalyzer // // Checked.... // ============================================================================ unit dpglib.LLkAnalyzer; interface uses System.Classes, dpgrtl.types, dpglib.Types, dpglib.Lookahead; type TLLkAnalyzer = class( TInterfacedObject, ILLkAnalyzer) protected DEBUG_ANALYZER : boolean; fCurrentBlock : IAlternativeBlock; fTool : ITool; fGrammar : IGrammar; fLexical : boolean; fCharFormatter : ICharFormatter; private function altUsesWildcardDefault( pAlt : IAlternative): boolean; procedure removeCompetingPredictionSets(var pSet: TByteSet; pElem: IALternativeElem); function getAltLookahead( pBlk : IAlternativeBlock; pAlt : integer; pK : integer): ILookahead; function DeterminisiticImpliedPath( pBlk : IBlockWithImpliedExitPath): boolean; protected // ------------------------------------------------------------ // ILLkAnalyzer methods // ------------------------------------------------------------ procedure SetGrammar( pGrammar: IGrammar); function Deterministic( pBlk: IAlternativeBlock): boolean; overload; function Deterministic( pBlk: IOneOrMoreBlock) : boolean; overload; function Deterministic( pBlk: IZeroOrMoreBlock) : boolean; overload; function Look( k: integer; pElem: IActionElem) : ILookahead; overload; function Look( k: integer; pBlk : IAlternativeBlock) : ILookahead; overload; function Look( k: integer; pEnd : IBlockEndElem) : ILookahead; overload; function Look( k: integer; pAtom: ICharLiteralElem) : ILookahead; overload; function Look( k: integer; pElem: ICharRangeElem) : ILookahead; overload; function Look( k: integer; pAtom: IGrammarAtom) : ILookahead; overload; function Look( k: integer; pBlk: IOneOrMoreBlock) : ILookahead; overload; function Look( k: integer; pBlk: INMBlock) : ILookahead; overload; function Look( k: integer; pElem: IRuleBlock) : ILookahead; overload; function Look( k: integer; pEnd: IRuleEndElem) : ILookahead; overload; function Look( k: integer; pElem: IRuleRefElem) : ILookahead; overload; function Look( k: integer; pAtom: IStringLiteralElem) : ILookahead; overload; function Look( k: integer; pBlk : ISynPredBlock) : ILookahead; overload; function Look( k: integer; pElem: ITokenRangeElem) : ILookahead; overload; function Look( k: integer; pElem: ITreeElem) : ILookahead; overload; function Look( k: integer; pElem: IWildCardElem) : ILookahead; overload; function Look( k: integer; pBlk: IZeroOrMoreBlock) : ILookahead; overload; function Look( k: integer; pElem: AnsiString) : ILookahead; overload; function FOLLOW( k: integer; pEnd: IRuleEndElem) : ILookahead; function SubRuleCanBeInverted( pBlock : IAlternativeBlock; pIsLexer : boolean): boolean; public constructor Create( pTool: ITool); destructor Destroy; override; end; implementation uses System.SysUtils, dpglib.Messages, dpglib.Utils, dpglib.CodeGenerator, dpglib.DelphiCharFormatter; { TLLkAnalyzer } // **************************************************************************** // Constructor/destructor // **************************************************************************** // ============================================================================ // Constructor // ============================================================================ constructor TLLkAnalyzer.Create(pTool: ITool); begin inherited Create; DEBUG_ANALYZER := false; fLexical := false; fCurrentBlock := nil; fGrammar := nil; fTool := pTool; fCharFormatter := TDelphiCharFormatter.Create; end; // ============================================================================ // Destructor // ============================================================================ destructor TLLkAnalyzer.Destroy; begin fCurrentBlock := nil; fTool := nil; fGrammar := nil; fCharFormatter := nil; inherited; end; // **************************************************************************** // Internals // **************************************************************************** // ============================================================================ // ============================================================================ // Deterministic // // Is this block of alternatives LL(k)? Fill in alternative cache for this blk. // Return true if the block is deterministic. // ============================================================================ function TLLkAnalyzer.Deterministic( pBlk: IAlternativeBlock): boolean; var k : integer; i,j : integer; l : integer; nalts : integer; save : IAlternativeBlock; elem : IAlternativeElem; zom : IZeroOrMoreBlock; oom : IOneOrMoreBlock; haveAmbig : boolean; p : ILookahead; q : ILookahead; r : array of ILookahead; sets : TInterfaceList; ai : IAlternative; aj : IAlternative; bei : IBlockEndElem; bej : IBlockEndElem; begin result := true; nalts := pBlk.Alternatives.Count; save := fCurrentBlock; fCurrentBlock := pBlk; pBlk.QueryInterface(IOneOrMoreBlock, oom); pBlk.QueryInterface(IZeroOrMoreBlock, zom); // --------------------------------------------------------------- // Don't allow nongreedy (...) blocks // --------------------------------------------------------------- if (pBlk.Greedy = false) and (oom = nil) and (zom = nil) then begin fTool.Warning( MSG_W_INVNONGREEDY, fGrammar.GrammarFile, pBlk.Line, pBlk.Column); end; // --------------------------------------------------------------- // SPECIAL CASE: only one alternative. We don't need to check the // determinism, but other code expects the lookahead cache to be // set for the single alt. // --------------------------------------------------------------- if nalts = 1 then begin elem := pBlk.Alternative[0].Head; fCurrentBlock.AltI := 0; if pBlk.Alternative[0].CacheSize < 2 then pBlk.Alternative[0].CacheSize := 2; pBlk.Alternative[0].Cache[1] := elem.Look(1); pBlk.Alternative[0].LookaheadDepth := 1; fCurrentBlock := save; result := true; exit; end; // --------------------------------------------------------------- // GENERAL CASE // --------------------------------------------------------------- SetLength( r, fGrammar.MaxK +1); for i:=0 to nalts -1 do begin fCurrentBlock.AltI := i; for j:=i+1 to nalts-1 do begin fCurrentBlock.AltJ := j; fCurrentBlock.AnalyzisAlt := j; // --------------------------------------------------------- // Always attempt minimum lookahead possible. // --------------------------------------------------------- k := 1; // --------------------------------------------------------- // Check to see if there is a lookahead depth that // distinguishes between alternatives i and j. // --------------------------------------------------------- haveAmbig := true; while haveAmbig and (k <= fGrammar.MaxK) do begin haveAmbig := false; p := getAltLookahead( pBlk, i, k); q := getAltLookahead( pBlk, j, k); r[k] := p.Intersection(q); if not r[k].IsNil then begin haveAmbig := true; INC(k); end; end; ai := pBlk.Alternative[i]; aj := pBlk.Alternative[j]; if haveAmbig then begin result := false; ai.LookaheadDepth := NONDETERMINISTIC; aj.LookaheadDepth := NONDETERMINISTIC; ai.Head.QueryInterface( IBlockEndElem, bei); aj.Head.QueryInterface( IBlockEndElem, bej); // ------------------------------------------------------ // if ith alt starts with a syntactic predicate, computing // the lookahead is still done for code generation, but // messages should not be generated when comparing against // alt j. Alternatives with syn preds that are unnecessary // do not result in syn pred try-blocks. // ------------------------------------------------------ if ai.SynPred <> nil then // --------------------------------------------------- // The alt with the (...)=> block is nondeterministic // for sure. If the (...)=> conflicts with alt j, j is // nondeterministic. This prevents alt j from being in // any switch statements. // Move on to next alternative=>no possible ambiguity! // --------------------------------------------------- // ------------------------------------------------------ // if ith alt starts with a semantic predicate, computing // the lookahead is still done for code generation, but // messages should not be generated when comparing against // alt j. // ------------------------------------------------------ else if ai.SemPred <> '' then // ------------------------------------------------------ // if jth alt is exactly the wildcard or wildcard root of // tree, then remove elements of alt i lookahead from alt // j's lookahead. // Don't do an ambiguity warning. // ------------------------------------------------------ else if altUsesWildcardDefault( aj) then // ------------------------------------------------------ // If the user specified warnWhenFollowAmbig=false, then we // can turn off this warning IF one of the alts is empty; // that is, it points immediately at the end block. // ------------------------------------------------------ else if (pBlk.WarnFollowAmbig = false) and ((bei <> nil) or (bej <> nil)) then // ------------------------------------------------------ // If they have the generateAmbigWarnings option off for // the block then don't generate a warning. // ------------------------------------------------------ else if pBlk.GenAmbigWarnings = false then // ------------------------------------------------------ // If greedy=true and *one* empty alt shut off warning. // ------------------------------------------------------ else if pBlk.Greedy and pBlk.GreedySet and (((bei = nil) and (bej <> nil)) or ((bei <> nil) and (bej = nil)))then // ------------------------------------------------------ // We have no choice, but to report a nondetermism. // ------------------------------------------------------ else begin sets := TInterfaceList.Create; for l:=1 to fGrammar.MaxK do sets.Add( r[l]); fTool.WarnAltAmbiguity( fGrammar, pBlk, fLexical, fGrammar.MaxK, sets, i, j); FreeAndNil( sets); end; end else begin if ai.LookaheadDepth < k then ai.LookaheadDepth := k; if aj.LookaheadDepth < k then aj.LookaheadDepth := k; end; end; end; r := nil; fCurrentBlock := save; end; // ============================================================================ // ============================================================================ // Deterministic (...)+ // // Is (...)+ block LL(1)? Fill in alternative cache for this block. // return true if the block is deterministic // ============================================================================ function TLLkAnalyzer.Deterministic(pBlk: IOneOrMoreBlock): boolean; var save : IAlternativeBlock; blkOK : boolean; det : boolean; begin save := fCurrentBlock; fCurrentBlock := pBlk; blkOK := Deterministic( pBlk as IAlternativeBlock); // --------------------------------------------------------------- // Block has been checked, now check that what follows does not // conflict with the lookahead of the (...)+ block. // --------------------------------------------------------------- det := DeterminisiticImpliedPath( pBlk); fCurrentBlock := save; result := blkOK and det; end; // ============================================================================ // ============================================================================ // Deterministic (...)* // // Is (...)* block LL(1)? Fill in alternative cache for this block. // return true if the block is deterministic // ============================================================================ function TLLkAnalyzer.Deterministic(pBlk: IZeroOrMoreBlock): boolean; var save : IAlternativeBlock; blkOK : boolean; det : boolean; begin save := fCurrentBlock; fCurrentBlock := pBlk; blkOK := Deterministic( pBlk as IAlternativeBlock); // --------------------------------------------------------------- // Block has been checked, now check that what follows does not // conflict with the lookahead of the (...)* block. // --------------------------------------------------------------- det := DeterminisiticImpliedPath( pBlk); fCurrentBlock := save; result := blkOK and det; end; // ============================================================================ // ============================================================================ // DeterminisiticImpliedPath // // Is this (...)* or (...)+ LL(k)? // ============================================================================ function TLLkAnalyzer.DeterminisiticImpliedPath( pBlk: IBlockWithImpliedExitPath): boolean; var k : integer; alt : IAlternative; alts : TInterfaceList; nalts : integer; i : integer; l : integer; be : IBlockEndElem; p : ILookahead; r : array of ILookahead; sets : TInterfaceList; haveAmbig : boolean; follow : ILookahead; begin result := true; alts := pBlk.Alternatives; nalts := alts.Count; fCurrentBlock.AltJ := -1; for i:=0 to nalts-1 do begin alt := pBlk.Alternative[i]; if alt.Head.QueryInterface(IBlockEndElem,be) = S_OK then begin fTool.Warning( MSG_W_INVEMPTYALT, fGrammar.GrammarFile, pBlk.Line, pBlk.Column); end; // ------------------------------------------------------------ // Assume each alternative is LL(1) with exit branch. // ------------------------------------------------------------ k := 1; // ------------------------------------------------------------ // Check to see if there is a lookahead depth that distinguishes // between alternative i and the exit branch. // ------------------------------------------------------------ SetLength( r, fGrammar.MaxK +1); fCurrentBlock.AltI := i; haveAmbig := true; while haveAmbig and (k <= fGrammar.MaxK) do begin haveAmbig := false; follow := pBlk.Next.Look(k); pBlk.ExitCache[k] := follow; p := getAltLookahead( pBlk, i, k); r[k] := follow.Intersection(p); if not r[k].IsNil then begin haveAmbig := true; INC(k); end; end; if haveAmbig then begin result := false; alt.LookaheadDepth:= NONDETERMINISTIC; pBlk.ExitDepth := NONDETERMINISTIC; // --------------------------------------------------------- // If the user specified warnWhenFollowAmbig=false, then we // can turn off this warning. // --------------------------------------------------------- if not pBlk.WarnFollowAmbig then // --------------------------------------------------------- // If they have the generateAmbigWarnings option off for the block // then don't generate a warning. // --------------------------------------------------------- else if not pBlk.GenAmbigWarnings then // --------------------------------------------------------- // If greedy=true and alt not empty, shut off warning // --------------------------------------------------------- else if pBlk.Greedy and pBlk.GreedySet and (be = nil) then // --------------------------------------------------------- // If greedy=false then shut off warning...will have // to add "if FOLLOW break" // block during code gen to compensate for removal of warning. // --------------------------------------------------------- else if (not pBlk.Greedy) and (be = nil) then (* // if FOLLOW not single k-string (|set[k]| can // be > 1 actually) then must warn them that // loop may terminate incorrectly. // For example, ('a'..'d')+ ("ad"|"cb") if (!lookaheadEquivForApproxAndFullAnalysis(blk.exitCache, grammar.maxk)) { tool.warning(new AnsiString[]{ "nongreedy block may exit incorrectly due", "\tto limitations of linear approximate lookahead (first k-1 sets", "\tin lookahead not singleton)."}, grammar.getFilename(), blk.getLine(), blk.getColumn()); *) // --------------------------------------------------------- // No choice but to generate a warning // --------------------------------------------------------- else begin sets := TInterfaceList.Create; for l:= 1 to fGrammar.MaxK do sets.Add( r[l]); fTool.WarnAltExitAmbiguity( fgrammar, pBlk, fLexical, fGrammar.MaxK, sets, i); FreeAndNil(sets); end; end else begin if alt.LookaheadDepth < k then alt.LookaheadDepth := k; if pBlk.ExitDepth < k then pBlk.ExitDepth := k; end; end; r := nil; end; // ============================================================================ // ============================================================================ // FOLLOW // // Compute the lookahead set of whatever follows references to // the rule associated witht the FOLLOW block. // ============================================================================ function TLLkAnalyzer.FOLLOW(k: integer; pEnd: IRuleEndElem): ILookahead; var ts : ITokenSymbol; rb : IRuleBlock; rs : IRuleSymbol; re : IRuleEndElem; rr : IRuleRefElem; lg : ILexerGrammar; rule : AnsiString; i : integer; q : ILookahead; begin // --------------------------------------------------------------- // What rule are we trying to compute FOLLOW of? // --------------------------------------------------------------- pEnd.Block.QueryInterface(IRuleBlock, rb); if fLexical then rule := TCodeGenerator.encodeLexerRuleName( rb.RuleName) else rule := rb.RuleName; // --------------------------------------------------------------- // Are we in the midst of computing this FOLLOW already. // --------------------------------------------------------------- if pEnd.Lock[k] then begin result := TLookahead.Create( rule); exit; end; // --------------------------------------------------------------- // Check to see if there is cached value. // --------------------------------------------------------------- if pEnd.Cache[k] <> nil then begin // ------------------------------------------------------------ // If the cache is a complete computation then simply return it // ------------------------------------------------------------ if pEnd.Cache[k].Cycle = '' then begin result := pEnd.Cache[k].clone; exit; end; // ------------------------------------------------------------ // A cache entry exists, but it is a reference to a cyclic com- // putation. // ------------------------------------------------------------ ts := fGrammar.Symbol[pEnd.Cache[k].Cycle]; ts.QueryInterface( IRuleSymbol, rs); re := rs.Block.EndElem; // ------------------------------------------------------------ // The other entry may not exist because it is still being // computed when this cycle cache entry was found here. // ------------------------------------------------------------ if re.Cache[k] = nil then begin // --------------------------------------------------------- // return the cycle...that's all we can do at the moment. // --------------------------------------------------------- result := pEnd.Cache[k].clone; exit; end else begin // --------------------------------------------------------- // Replace this cache entry with the entry from the // referenced computation. Eventually, this percolates a // complete (no cycle reference) cache entry to this node // (or at least gets it closer and closer). This is not // crucial, but makes cache lookup faster as we might have // to look up lots of cycle references before finding a // complete reference. // --------------------------------------------------------- pEnd.Cache[k] := re.Cache[k].clone; result := re.Cache[k].clone; exit; end; end; pEnd.Lock[k] := true; result := TLookahead.Create; ts := fGrammar.Symbol[rule]; ts.QueryInterface( IRuleSymbol, rs); // --------------------------------------------------------------- // Walk list of references to this rule to compute FOLLOW // --------------------------------------------------------------- for i:=0 to rs.ReferenceCount-1 do begin rr := rs.Reference[i]; q := rr.Next.Look(k); // ------------------------------------------------------------ // If there is a cycle then if the cycle is to the rule for // this end block, you have a cycle to yourself. Remove the // cycle indication--the lookahead is complete. // ------------------------------------------------------------ if q.Cycle = rule then q.Cycle := ''; // ------------------------------------------------------------ // Add the lookahead into the curretn FOLLOW computation set. // ------------------------------------------------------------ result.CombineWith( q); end; pEnd.Lock[k] := false; // --------------------------------------------------------------- // If no rules follow this, it can be a start symbol or called by // a start symbol. Set the follow to be end of file. // --------------------------------------------------------------- // if result.IsNil and (result.Cycle = '') then if (result.LaSet = []) and (result.Cycle = '') then begin // ------------------------------------------------------------ // Lexical grammars use Epsilon to indicate that the end of rule // has been hit. EOF would be misleading; any AnsiCharacter can // follow a token rule not just EOF as in a grammar (where a // start symbol is followed by EOF). There is no sequence info // in a lexer between tokens to indicate what is the last token // to be seen. // ------------------------------------------------------------ if fGrammar.QueryInterface(ILexerGrammar, lg) = S_OK then result.HasEpsilon := true else result.LaSet := result.LaSet + [TT_EOF] end; // --------------------------------------------------------------- // Cache the result of the FOLLOW computation. // --------------------------------------------------------------- pEnd.Cache[k] := result.clone; end; // ============================================================================ // ============================================================================ // getAltLookahead // ============================================================================ function TLLkAnalyzer.getAltLookahead( pBlk : IAlternativeBlock; pAlt : integer; pK : integer): ILookahead; var alt : IAlternative; elem : IAlternativeElem; begin alt := pBlk.Alternative[pAlt]; elem := alt.Head; if alt.Cache[pK] = nil then alt.Cache[pK] := elem.Look( pK); result := alt.Cache[pK].clone; end; // ============================================================================ // ============================================================================ // Look (Action) // ============================================================================ function TLLkAnalyzer.Look(k: integer; pElem: IActionElem): ILookahead; begin result := pElem.Next.Look(k); end; // ============================================================================ // ============================================================================ // Look (AlternativeBlock) // ============================================================================ function TLLkAnalyzer.Look(k: integer; pBlk: IAlternativeBlock): ILookahead; var save : IAlternativeBlock; elem : IAlternativeElem; alt : IAlternative; i : integer; begin save := fCurrentBlock; fCurrentBlock := pBlk; result := TLookahead.Create; for i:=0 to pBlk.Alternatives.Count -1 do begin fCurrentBlock.AnalyzisAlt := i; alt := pBlk.Alternative[i]; elem := alt.Head; result.CombineWith( elem.Look(k)); end; if (k=1) and pBlk.IsNot and SubRuleCanBeInverted( pBlk, fLexical) then begin if fLexical then result.LaSet := fGrammar.CharVocabulary - result.LaSet else result.LaSet := [TT_USER..fGrammar.TokenManager.MaxTokenType] - result.LaSet; end; fCurrentBlock := save; end; // ============================================================================ // Look (BlockEnd) // // Compute what follows this place-holder node and possibly what begins the // associated loop unless the node is locked. // // If we hit the end of a loop, we have to include what tokens can begin the // loop as well. If the start node is locked, then we simply found an empty // path through this subrule while analyzing it. If the start node is not // locked, then this node was hit during a FOLLOW operation and the FIRST of // this block must be included in that lookahead computation. // ============================================================================ function TLLkAnalyzer.Look(k: integer; pEnd: IBlockEndElem): ILookahead; var zom : IZeroOrMoreBlock; oom : IOneOrMoreBlock; spb : ISynPredBlock; begin // --------------------------------------------------------------- // computation in progress => the tokens we would have computed // (had we not been locked) will be included in the set by that // computation with the lock on this node. // --------------------------------------------------------------- if pEnd.Lock[k] then begin result := TLookahead.Create; exit; end; // --------------------------------------------------------------- // Hitting the end of loop means you can see what begins the loop // --------------------------------------------------------------- pEnd.Block.QueryInterface( IZeroOrMoreBlock, zom); pEnd.Block.QueryInterface( IOneOrMoreBlock, oom); if (zom <> nil) or (oom <> nil) then begin // ------------------------------------------------------------ // Compute what can start the block, but lock end-node so // we don't do it twice in the same computation. // ------------------------------------------------------------ pEnd.Lock[k] := true; result := Look( k, pEnd.Block); pEnd.Lock[k] := false; end else result := TLookahead.Create; // --------------------------------------------------------------- // Syntactic predicates such as ( (A)? )=> have no follow per // se. We cannot accurately say what would be matched following // a syntactic predicate (you MIGHT be ok if you said it was // whatever followed the alternative predicted by the predicate). // Hence, (like end-of-token) we return Epsilon to indicate // "unknown lookahead." // --------------------------------------------------------------- if pEnd.QueryInterface( ISynPredBlock, spb) = S_OK then result.HasEpsilon := true // ------------------------------------------------------------ // Compute what can follow the block // ------------------------------------------------------------ else result.CombineWith( pEnd.Block.Next.Look(k)); //??? end; // ============================================================================ // ============================================================================ // Look (CharLiteral) // // Return this AnsiChar as the lookahead if k=1. // ### Doesn't work for ( 'a' 'b' | 'a' ~'b' ) yet!!! // // If the atom has the 'not' flag on, then create the set complement of the // tokenType which is the set of all AnsiCharacters referenced in the grammar with // this AnsiChar turned off. // Also remove AnsiCharacters from the set that are currently allocated for // predicting previous alternatives. This avoids ambiguity messages and is more // properly what is meant. // // NOTE: we do NOT include exit path in the exclusion set. E.g., // ( 'a' | ~'a' )* 'b' // should exit upon seeing a 'b' during the loop. // ============================================================================ function TLLkAnalyzer.Look(k: integer; pAtom: ICharLiteralElem): ILookahead; var b: TByteSet; begin // --------------------------------------------------------------- // Handle lexer case // --------------------------------------------------------------- if fLexical then begin // ------------------------------------------------------------ // Skip until analysis hits k=1 // ------------------------------------------------------------ if k > 1 then begin result := pAtom.Next.Look(k-1); exit; end; // ------------------------------------------------------------ // Inverted AnsiCharacter literal e.g.: ~'c' // ------------------------------------------------------------ if pAtom.IsNot then begin b := fGrammar.CharVocabulary; // --------------------------------------------------------- // Remove stuff predicted by preceding alts and follow of // block. // --------------------------------------------------------- removeCompetingPredictionSets( b, pAtom); // --------------------------------------------------------- // Remove elem that is stated not to be in the set // --------------------------------------------------------- b := b - [pAtom.TokenType]; result := TLookahead.Create(b); end // ------------------------------------------------------------ // Non-inverted AnsiCharacter literal e.g.: 'c' // ------------------------------------------------------------ else result := TLookahead.Create( pAtom.TokenType); end // --------------------------------------------------------------- // Handle parser/treewalker case. AnsiCharacter literal reference is // invalid in non-lexer grammars. This should have been avoided by // GrammarMaker. // --------------------------------------------------------------- else begin fTool.Panic('Character literal reference found in parser'); result := nil; end end; // ============================================================================ // ============================================================================ // Look (CharRange) // ============================================================================ function TLLkAnalyzer.Look(k: integer; pElem: ICharRangeElem): ILookahead; begin if k > 1 then result := pElem.Next.Look(k-1) else result := TLookahead.Create([ord(pElem.BeginChar)..ord(pElem.EndChar)]); end; // ============================================================================ // ============================================================================ // Look (GrammarAtom) // ============================================================================ function TLLkAnalyzer.Look(k: integer; pAtom: IGrammarAtom): ILookahead; var b: TByteSet; begin // --------------------------------------------------------------- // Handle parser/treewalker case. // --------------------------------------------------------------- if not fLexical then begin // ------------------------------------------------------------ // Skip until analysis hits k=1 // ------------------------------------------------------------ if k > 1 then begin result := pAtom.Next.Look( k-1); exit; end; // ------------------------------------------------------------ // Inverted token reference e.g.: ~INTEGER // ------------------------------------------------------------ if pAtom.IsNot then begin b := [TT_USER..fGrammar.TokenManager.MaxTokenType]; // --------------------------------------------------------- // Remove stuff predicted by preceding alts and follow of // block. // --------------------------------------------------------- removeCompetingPredictionSets( b, pAtom); // --------------------------------------------------------- // Remove elem that is stated not to be in the set // --------------------------------------------------------- b := b - [pAtom.TokenType]; result := TLookahead.Create(b); end // ------------------------------------------------------------ // Non-inverted token reference e.g.: HEXINT // ------------------------------------------------------------ else result := TLookahead.Create( pAtom.TokenType); end // --------------------------------------------------------------- // Handle lexer case. Token reference is not valid in lexer // grammars. This should have been avoided by GrammarMaker. // --------------------------------------------------------------- else begin fTool.Panic('Token reference found in lexer.'); result := nil; end; end; // ============================================================================ // ============================================================================ // Look (...)+ // // The lookahead of a (...)+ block is the combined lookahead of all alternatives // and, if an empty path is found, the lookahead of what follows the block. // ============================================================================ function TLLkAnalyzer.Look(k: integer; pBlk: IOneOrMoreBlock): ILookahead; begin result := Look( k, pBlk as IAlternativeBlock); end; // ================================================================================================ // Look (...)@(n,m) // ================================================================================================ function TLLkAnalyzer.Look( k: integer; pBlk: INMBlock): ILookahead; var i: integer; begin if k <= pBlk.Low then result := look(k, pBlk as IAlternativeBlock) else if k <= pBlk.High then begin result := look(k, pBlk as IAlternativeBlock); for i:=pBlk.Low+1 to k do result.CombineWith( pBlk.Next.Look(k-i+1)); end else {k > pBlk.High} begin result := TLookahead.Create; // create empty one for i:=pBlk.Low+1 to k do result.CombineWith( pBlk.Next.Look(k-i+1)); end end; // ============================================================================ // ============================================================================ // Look (...)* // // The (...)* element is the combined lookahead of the alternatives and what can // follow the loop. // ============================================================================ function TLLkAnalyzer.Look(k: integer; pBlk: IZeroOrMoreBlock): ILookahead; begin result := look( k, pBlk as IAlternativeBlock); result.CombineWith( pBlk.Next.Look(k)); end; // ============================================================================ // ============================================================================ // Look (RuleBlock) // // Combine the lookahead computed for each alternative. Lock the node so that // no other computation may come back on itself -- infinite loop. This also // implies infinite left-recursion in the grammar // (or an error in this algorithm ;)). // ============================================================================ function TLLkAnalyzer.Look(k: integer; pElem: IRuleBlock): ILookahead; begin result := Look( k, pElem as IAlternativeBlock); end; // ============================================================================ // ============================================================================ // Look (RuleEnd) // // Lexical rules never compute follow. They set epsilon and the code generator // generates code to check for any AnsiCharacter. The code generator must remove the // tokens used to predict any previous alts in the same block. // // When the last node of a rule is reached and noFOLLOW, it implies that a // "local" FOLLOW will be computed after this call. I.e., // // a : b A; // b : B | ; // c : b C; // // Here, when computing the look of rule b from rule a, we want only // {B,EPSILON_TYPE} so that look(b A) will be {B,A} not {B,A,C}. // // If the end block is not locked and the FOLLOW is wanted, the algorithm must // compute the lookahead of what follows references to this rule. If end block // is locked, FOLLOW will return an empty set with a cycle to the rule // associated with this end block. // ============================================================================ function TLLkAnalyzer.Look(k: integer; pEnd: IRuleEndElem): ILookahead; begin if pEnd.noFOLLOW or fLexical then begin result := TLookahead.Create; result.HasEpsilon := true; result.Epsilon := [k]; end else result := FOLLOW( k, pEnd); end; // ============================================================================ // ============================================================================ // Look (RuleRef) // // When computing ruleref lookahead, we don't want the FOLLOW computation done // if an empty path exists for the rule. The FOLLOW is too loose of a set... // we want only to include the "local" FOLLOW or what can follow this // particular ref to the node. In other words, we use context information to // reduce the complexity of the analysis and strengthen the parser. // // The noFOLLOW flag is used as a means of restricting the FOLLOW to a // "local" FOLLOW. This variable is orthogonal to the 'lock' variable that // prevents infinite recursion. noFOLLOW does not care about what k is. // ============================================================================ function TLLkAnalyzer.Look(k: integer; pElem: IRuleRefElem): ILookahead; var ts : ITokenSymbol; rs : IRuleSymbol; rb : IRuleBlock; re : IRuleEndElem; se : boolean; p : ILookahead; q : ILookahead; depths: TByteSet; i : integer; rname : AnsiString; cname : AnsiString; begin rs := nil; ts := fGrammar.Symbol[pElem.TargetRule]; if ts <> nil then ts.QueryInterface(IRuleSymbol, rs); // --------------------------------------------------------------- // The symbol not exists in the grammar. // --------------------------------------------------------------- if rs = nil then begin if fLexical then rname := TCodeGenerator.decodeLexerRuleName( pElem.TargetRule) else rname := pElem.TargetRule; fTool.Error(Format( MSG_E_RULENOTDEFINED, [rname]), fGrammar.GrammarFile, pElem.Line, pElem.Column); // fGrammar.Tool.Error( 'No definition of rule "' + rname + '"', // fGrammar.GrammarFile, // pElem.Line, // pElem.Column); result := TLookahead.Create; exit; end; // --------------------------------------------------------------- // The symbol not defined in the grammar // --------------------------------------------------------------- if not rs.Defined then begin if fLexical then rname := TCodeGenerator.decodeLexerRuleName( pElem.TargetRule) else rname := pElem.TargetRule; fTool.Error(Format( MSG_E_RULENOTDEFINED, [rname]), fGrammar.GrammarFile, pElem.Line, pElem.Column); // fGrammar.Tool.Error( 'No definition of rule "' + rname + '"', // fGrammar.GrammarFile, // pElem.Line, // pElem.Column); result := TLookahead.Create; exit; end; rb := rs.Block; re := rb.EndElem; se := re.noFOLLOW; re.noFOLLOW := true; // --------------------------------------------------------------- // Go off the rule and get the lookahead (w/o FOLLOW) // --------------------------------------------------------------- p := Look( k, pElem.TargetRule); // --------------------------------------------------------------- // Restore state of end block // --------------------------------------------------------------- re.noFOLLOW := se; // --------------------------------------------------------------- // Check for infinite recursion. If a cycle is returned: trouble!! // --------------------------------------------------------------- if p.Cycle <> '' then begin if fLexical then begin rname := TCodeGenerator.decodeLexerRuleName( pElem.TargetRule); cname := TCodeGenerator.decodeLexerRuleName( p.Cycle); end else begin rname := pElem.TargetRule; cname := p.Cycle; end; fTool.Error(Format( MSG_E_INFRECURSION, [cname,rname]), fGrammar.GrammarFile, pElem.Line, pElem.Column); // fTool.Error('infinite recursion to rule "' + // cname + // '" from rule " ' + // rname + // '"', // fGrammar.GrammarFile, // pElem.Line, // pElem.Column); end; // --------------------------------------------------------------- // Is the local FOLLOW required? // --------------------------------------------------------------- if p.HasEpsilon then begin // ------------------------------------------------------------ // Remove Epsilon // ------------------------------------------------------------ p.HasEpsilon := false; // ------------------------------------------------------------ // For each lookahead depth that saw epsilon // // Note: any of these look() computations for local follow can // set EPSILON in the set again if the end of this rule // is found. // ------------------------------------------------------------ depths := p.Epsilon; p.Epsilon:= []; for i:=0 to 255 do begin if (depths * [i]) <> [] then begin q := pElem.Next.Look( k - (k - i)); p.CombineWith(q); end; end; end; result := p; end; // ============================================================================ // ============================================================================ // Look (StringLiteral) // ============================================================================ function TLLkAnalyzer.Look( k : integer; pAtom : IStringLiteralElem): ILookahead; begin // --------------------------------------------------------------- // Create Lookahead for lexer grammar // --------------------------------------------------------------- if fLexical then begin if k > Length( pAtom.ProcessedAtomText) then result := pAtom.Next.Look( k - Length( pAtom.ProcessedAtomText)) else result := TLookahead.Create( ord(pAtom.ProcessedAtomText[k])); end // --------------------------------------------------------------- // Create Lookahead for non-lexer grammar // --------------------------------------------------------------- else begin if k > 1 then result := pAtom.Next.Look( k-1) else begin if pAtom.IsNot then result := TLookahead.Create([TT_USER..fGrammar.TokenManager.MaxTokenType] - [pAtom.TokenType]) else result := TLookahead.Create( pAtom.TokenType); end; end; end; // ============================================================================ // ============================================================================ // Look (...)=> // // The lookahead of a (...)=> block is the lookahead of what follows the block. // By definition, the syntactic predicate block defines static analysis (you // want to try it out at run-time). // The LOOK of (a)=>A B is A for LL(1) // ============================================================================ function TLLkAnalyzer.Look(k: integer; pBlk: ISynPredBlock): ILookahead; begin result := pBlk.Next.Look(k); end; // ============================================================================ // ============================================================================ // Look (TT_XXX .. TT_YYY) // ============================================================================ function TLLkAnalyzer.Look(k: integer; pElem: ITokenRangeElem): ILookahead; begin if k > 1 then result := pElem.Next.Look(k-1) else result := TLookahead.Create([pElem.BeginToken..pElem.EndToken]); end; // ============================================================================ // ============================================================================ // Look (.) // ============================================================================ function TLLkAnalyzer.Look(k: integer; pElem: IWildCardElem): ILookahead; var b : TByteSet; begin // --------------------------------------------------------------- // Skip until analysis hits k=1 // --------------------------------------------------------------- if k > 1 then result := pElem.Next.Look( k - 1) else begin if fLexical then b := fGrammar.CharVocabulary else b := [TT_USER..fGrammar.TokenManager.MaxTokenType]; { TODO : look(wildcard) delete 'removeCompeting...' if don't needed } removeCompetingPredictionSets( b, pElem); result := TLookahead.Create( b); end; end; // ============================================================================ // ============================================================================ // Look (rule name) // // Compute the combined lookahead for all productions of a rule. If the // lookahead returns with epsilon, at least one epsilon path exists (one that // consumes no tokens). The noFOLLOW flag being set for this endruleblk, // indicates that the a rule ref invoked this rule. // // Currently only look(RuleRef) calls this. There is no need for the code // generator to call this. // ============================================================================ function TLLkAnalyzer.Look(k: integer; pElem: AnsiString): ILookahead; var ts : ITokenSymbol; rs : IRuleSymbol; rb : IRuleBlock; begin ts := fGrammar.Symbol[pElem]; ts.QueryInterface( IRuleSymbol, rs); rb := rs.Block; // --------------------------------------------------------------- // Check for infinite recursion // --------------------------------------------------------------- if rb.Lock[k] then begin result := TLookahead.Create( pElem); exit; end; // --------------------------------------------------------------- // Well, the lookahead wasn't computed before, so do it. // --------------------------------------------------------------- if rb.Cache[k] = nil then begin rb.Lock [k] := true; rb.Cache[k] := Look( k, rb as IRuleBlock); rb.Lock [k] := false; end; result := rb.Cache[k].clone; end; // ---------------------------------------------------------------------------- // Look (tree) // ---------------------------------------------------------------------------- function TLLkAnalyzer.Look(k: integer; pElem: ITreeElem): ILookahead; begin { TODO : look(tree) not implemented } if k > 1 then result := pElem.Next.Look( k-1) else begin result := nil; end; end; // ============================================================================ // ============================================================================ // SetGrammar // ============================================================================ procedure TLLkAnalyzer.SetGrammar(pGrammar: IGrammar); var lg : ILexerGrammar; begin fGrammar := pGrammar; if fGrammar.QueryInterface(ILexerGrammar,lg) = S_OK then fLexical := true else fLexical := false; end; // ============================================================================ // ============================================================================ // altUsesWildcardDefault // // Return true is someone used the '.' wildcard default idiom, which means the // alternative has only two elems: wildcard-elem followed by block-end-elem. // ============================================================================ function TLLkAnalyzer.altUsesWildcardDefault( pAlt: IAlternative): boolean; var head : IAlternativeElem; wc : IWildcardElem; be : IBlockEndElem; begin wc := nil; be := nil; result := false; head := pAlt.Head; head.QueryInterface( IWildcardElem, wc); if head.next <> nil then head.Next.QueryInterface( IBlockEndElem, be); if (wc <> nil) and (be <> nil) then result := true; end; // ============================================================================ // ============================================================================ // subRuleCanBeInverted // ============================================================================ function TLLkAnalyzer.subRuleCanBeInverted( pBlock : IAlternativeBlock; pIsLexer : boolean): boolean; var zom : IZeroOrMoreBlock; oom : IOneOrMoreBlock; spb : ISynPredBlock; i : integer; alt : IAlternative; elt : IAlternativeElem; cLit : ICharLiteralElem; cRng : ICharRangeELem; tRef : ITokenRefElem; tRng : ITokenRangeElem; sLit : IStringLiteralElem; be : IBlockEndElem; begin result := false; // --------------------------------------------------------------- // Cannot invert (...)*, (...)+, (...)=> // --------------------------------------------------------------- pBlock.QueryInterface( IZeroOrMoreBlock, zom); pBlock.QueryInterface( IOneOrMoreBlock, oom); pBlock.QueryInterface( ISynPredBlock, spb); if (zom <> nil) or (oom <> nil) or (spb <> nil) then exit; // --------------------------------------------------------------- // Cannot invert an empty subrule // --------------------------------------------------------------- if pBlock.Alternatives.Count = 0 then exit; // --------------------------------------------------------------- // The block must only contain alternatives with a single element, // where each element is a AnsiChar, token, AnsiChar range or token range. // --------------------------------------------------------------- for i:=0 to pBlock.Alternatives.Count -1 do begin alt := pBlock.Alternative[i]; // ------------------------------------------------------------ // Cannot have anything interesting in the alternative ... // ------------------------------------------------------------ if (alt.SynPred <> nil) or (alt.SemPred <> '') or (alt.ExHandlerType <> '') then begin exit; end; // ------------------------------------------------------------ // ... and there must be one simple element // ------------------------------------------------------------ elt := alt.Head; elt.QueryInterface( ICharLiteralElem, cLit); elt.QueryInterface( ICharRangeElem, cRng); elt.QueryInterface( ITokenRefElem, tRef); elt.QueryInterface( ITokenRangeElem, tRng); elt.QueryInterface( IStringLiteralElem, sLit); elt.Next.QueryInterface( IBlockEndElem, be); if( not (( cLit <> nil) or ( cRng <> nil) or ( tRef <> nil) or ( tRng <> nil) or ((sLit <> nil) and pIsLexer)) or (be = nil) or (pBlock.AutoGenType <> AUTOGEN_NONE)) then begin exit; end; end; result := true; end; // ============================================================================ // ============================================================================ // removeCompetingPredictionSets // // Remove the prediction sets from preceding alternatives, but *only* if this // element is the first element of the alternative. The class members // 'fCurrentBlock' and 'fCurrentBlock.AnalysisAlt' must be set correctly. // ============================================================================ procedure TLLkAnalyzer.removeCompetingPredictionSets( var pSet : TByteSet; pElem : IAlternativeElem); var i : integer; head : IGrammarElem; elem : IAlternativeElem; begin // --------------------------------------------------------------- // Only do this if the element is the first element of the alter- // native, because we are making an implicit assumption that k==1. // --------------------------------------------------------------- head := fCurrentBlock.Alternative[fCurrentBlock.AnalyzisAlt].Head; if pElem = head then begin for i:=0 to fCurrentBlock.AnalyzisAlt -1 do begin elem := fCurrentBlock.Alternative[i].Head; pSet := pSet - elem.Look(1).LaSet; end end; end; end.