using System; using System.IO; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Threading.Tasks; using System.Threading; using miew.Debugging; using miew.Enumerable; using miew.String; using miew.Unsafe; using miew.Tokenization; using miew.ReadOnly; using miew.Concurrency; namespace agree { using unif_stats = ParseControl.Stats._unification; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public class ParseChart : SubscriptionChart { public ParseChart(ParseControl ctrl) : base(ctrl) { ctrl.chart = this; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Generate new activities for the specified IParseObj /// Try to unify the given chart edge against the KEY position of each compatible rule. If successful, proceed as /// follows: /// - Unary rules are now finished up immediately here, creating a passive edge (without creating an ActiveEdge) /// and entering it into the parse chart. /// - Otherwise, an ActiveEdge that expands leftwards or rightwards is created, as appropriate. Active edges are /// always seeded with a positioned passive edge, but note that neither this partial passive edge (nor the /// ActiveEdge) are ever entered into the parse chart. Instead, this function uses a subscription mechanism /// to register the active edge's interest in a particular chart position. This subscription is what keeps the /// ActiveEdge (object) alive. /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// protected override void GenerateActiveEdges(IParseObj ce) { if (!ce.IsActive()) return; /// this being a new task, we start a new set of statistics which will be aggregated later, in order /// to avoid syncrhonization expenses unif_stats u_stats = new unif_stats(ParseControl.Stats.Visibility.Private); Tfs new_tfs = ce.Tfs; int i_start = ce.TokenSpan.StartIndex; int i_end = ce.TokenSpan.EndIndex; bool f_entire_span = ce.TokenSpan.Equals(EntireSpan); bool f_packing = ctrl.parser_config.packing != Config.Parser.PackingOpts.None; /// Rule pre-check filter "A Bag of Useful Technique for Efficient and Robust Parsing" (Kiefer et al. 1999) IEnumerable<GrammarRule> rules_to_use; Rule new_daughter = ce.License as Rule; if (new_daughter != null) rules_to_use = new_daughter.CompatibleKeyMothers.OfType<GrammarRule>(); else rules_to_use = ctrl.g._grammar_rules; PassiveEdge.ParseObjs po = new PassiveEdge.ParseObjs(ce); foreach (GrammarRule r in rules_to_use) { TfsSection[] rd = r.RuleDaughters; TfsSection daughter; int i_key = 0; if (rd.Length == 1) { daughter = rd[0]; Debug.Assert(!r.IsSpanOnly); } else { i_key = r.KeyDaughterIndex; // if the edge will not fit, don't bother creating the active edge. if (i_start < i_key) continue; if (i_end > ColumnCount - (rd.Length - i_key)) continue; /// We only have to try out one of the rule's ARGS positions and it doesn't matter which one it is, /// as long as that choice is consistently used for that rule. This allows us to choose the KEY arg /// without the risk of doing double work. Usage of this new edge in the other alignments may be /// handled later, when/if the rule succeeds in a neighboring chart position. if (i_key == 0) { /// Since active edges only grow in one direction, there's no point in creating a span-only /// active edge which does not abut the beginning (or end) of the chart. if (r.IsSpanOnly && i_start > 0) continue; daughter = rd[0]; } else { if (r.IsSpanOnly && i_end < ColumnCount - 1) continue; daughter = rd[rd.Length - 1]; } } /// Quick-check: /// Robert Malouf, John Carroll, Ann Copestake. 2000. Efficient feature structure operations without compliation. /// Natural Language Engineering 6 (1): 29-46. Cambridge: Cambridge University Press /// todo: track which qc rules failed for this rule daughter and propagate to the next rule..? if (ctrl.QuickCheckParse(new_tfs, daughter)) continue; MotherDaughterTfs mdtfs = ctrl.UnifySection( daughter, new_tfs, rd.Length == 1, f_packing, ref u_stats); if (mdtfs != null) { if (rd.Length > 2) Nop.CodeCoverage(); if (rd.Length == 1) FinishMatchedEdge(r, mdtfs, ref po, ref u_stats); else if (i_key == 0) Subscribe(RIGHT, new ActiveRightEdge(this, r, mdtfs, ref po), ref u_stats); else Subscribe(LEFT, new ActiveLeftEdge(this, r, mdtfs, ref po), ref u_stats); } } ctrl.stats.Parsing.Unification.Add(u_stats); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// When a rule has successfully completed building all of its structure /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public unsafe void FinishMatchedEdge(GrammarRule rule, Tfs tfs, ref PassiveEdge.ParseObjs po, ref unif_stats u_stats) { PassiveEdge.Derived nce = null; TextWriter tw = ctrl.config.system.tw_debug; /// try to match a start symbol bool f_entire_span = po.Span.Equals(EntireSpan); if (f_entire_span) { StartSymbol[] rgss = ctrl.g.StartSymbols; bool* rgss_map = stackalloc bool[rgss.Length]; int c_match = 0; int i_max_ix = -1; for (int i_ss = 0; i_ss < rgss.Length; i_ss++) { StartSymbol start_sym = rgss[i_ss]; u_stats.c_attempted++; if (ctrl.UnifyCheck(start_sym.Expanded, tfs)) { u_stats.c_succeeded++; if (tw != null) tw.WriteLineColor("$darkgreen {0}$ matched start symbol $magenta {1}", nce, start_sym); /// If not packing, stop checking start symbols after a first match. #if !CHECK_ALL_SS if (ctrl.parser_config.packing == Config.Parser.PackingOpts.None) { nce = new PassiveEdge.Completed(ctrl, rule, tfs, i_ss, ref po); break; } #endif /// If packing, check all start symbols and create a map of any matches. rgss_map[i_ss] = true; if (i_ss > i_max_ix) i_max_ix = i_ss; c_match++; } } /// If packing, tag the spanning restricted edge as 'Completed' if it matched at least one /// start symbol. Only such edges--and their packed contents--need to be examined in the /// unpacking phase if (c_match == 1) nce = new PassiveEdge.Completed(ctrl, rule, tfs, i_max_ix, ref po); else if (c_match > 1) nce = new PassiveEdge.Completed(ctrl, rule, tfs, Ext.ToArray(rgss_map, i_max_ix + 1), ref po); } nce = nce ?? new PassiveEdge.Derived(ctrl, rule, tfs, ref po); ctrl.StartAttachedChildTask(AddParseObj, nce); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Check for a subsumption relationship between a newly derived edge and any of the existing co-spanning edges. /// Proactive packing occurs when the existing edge subsumes the new edge, or when they subsume each other (which /// means they are identical). Retroactive packing occurs when the new edge subsumes the existing edge. There are /// two other possibilities: that there is no overall subsumption relationship between otherwise compatible edges, /// or that the two are incompatible (do not unify). Fortunately, no packing is indicated in either case, because /// if we had to distinguish between these two cases, the subsumption test would be much less efficient (it would /// have to resolve coreference chains). /// </summary> /// <reference> /// Stephan Oepen, John Carroll. 2000. "Ambiguity Packing in Constraint-based Parsing--Practical Results." /// In Proceedings of the 1st North American chapter of the Association for Computational Linguistics /// conference. Morgan Kaufmann Publishers Inc., San Francisco, CA, USA, 162-169. /// </reference> /// <remarks> /// derived parents of packed edges automatically become defunct (not 'active'). Since only packed /// edges can be in the 'hold' state, any edge that has a daughter in the 'hold' state can be /// ignored; this is achieved by incrementing a global generation count which /// immediately invalidates all of these states, which are cached, causing any still 'active' chart edge /// to re-evaluate this condition regarding itself, on demand, at such time as it's queried. Now since we /// are in the parsing phase, the fact that we just put an object in the 'hold' state doesn't affect any /// of these parent/daughter results ('hold' does not entail 'defunct') but what does make a difference /// is the transfer of the packed edges from the old edge to the new, as the _daughters_ of the old edge /// may now--or at such future time as they become defunct--cause the deactivation of their altered /// derivation list. /// </remarks> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// protected override bool SubsumptionPacking(IParseObj po) { PassiveEdge new_edge = po as PassiveEdge; if (new_edge == null) return false; int i_seq = new_edge.SequenceId; Config.Parser pcfg = ctrl.parser_config; sbyte opt = (sbyte)pcfg.packing; bool f_exact = (opt & (sbyte)Config.Parser.PackingOpts.Only) > 0; if (f_exact) opt &= (sbyte)~Config.Parser.PackingOpts.Only; bool f_entire_span = new_edge.IsEntireSpan; List<PassiveEdge> deferred_retro = null; for (var e = new ChartCell._enum(this[new_edge.TokenSpan]); e.MoveNext(); ) { PassiveEdge existing = e.Current as PassiveEdge; /// packing of morphology edges is currently not implemented if (existing == null || existing == new_edge) continue; if ((existing.State & (SequenceControl.stb_Hold | SequenceControl.stb_Remove)) > 0) continue; /// Don't permit a spanning edge which didn't match a start symbol (i.e. PassiveEdge.Derived) /// to be packed into one that did (i.e. PassiveEdge.Completed), nor vice-versa. if (f_entire_span && new_edge.GetType() != existing.GetType()) continue; if (!existing.SeqIdBelow(i_seq)) continue; /// evaluate the subsumption relationship between two TFSen sbyte b = new Subsumption(existing, new_edge).Check(); if ((f_exact && b != opt) || (b & opt) == 0) continue; /// proactive packing (the existing edge subsumes or is equivalent to the new edge) if ((b & Subsumption.FirstSubsumesSecond) > 0) { /// ???? DoSkippedItems() ???? if (deferred_retro != null) { // Console.Write("m"); DoSkippedItems(new_edge, deferred_retro); deferred_retro = null; } /// for proactive, this is likely the last thing to do, so we should wait for it if (existing.BeginTransact()) { /// put the new, unpublished edge on HOLD before being added to the packing list, so /// that it can be never witnessed as active in any context //Interlocked.Increment(ref g_dseq); new_edge.SetStateRaw(SequenceControl.HOLD); existing.HoistPackingFrom(new_edge); existing.EndTransact(SequenceControl.ACTIVE); ctrl.stats.Parsing.Packing.RecordEvent(b); return true; } continue; } /// retroactive packing (the new edge subsumes the existing edge) if (b == Subsumption.SecondSubsumesFirst) { int _tmp = existing.TryTransact(); if (_tmp == SequenceControl.ACTIVE) { /// our claim is secure; release lock on 'old' as quickly as possible existing.SetStateRaw(SequenceControl.HOLD); new_edge.HoistPackingFrom(existing); /// see notes above //Interlocked.Increment(ref g_dseq); //existing.EndTransact(SequenceControl.HOLD); ctrl.stats.Parsing.Packing.RecordEvent(b); } else if ((_tmp & SequenceControl.stb_Blocked) > 0) { /// if a retroactive edge is busy, we'll just make a note of it to try again later (deferred_retro = deferred_retro ?? new List<PassiveEdge>(7)).Add(existing); } } } if (deferred_retro != null) DoSkippedItems(new_edge, deferred_retro); return false; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Try to complete the retroactive packing for edges that were blocked earlier /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void DoSkippedItems(PassiveEdge new_edge, List<PassiveEdge> skipped) { int c = skipped.Count; int ix = -1; do { PassiveEdge existing; do if (++ix == skipped.Count) ix = 0; while ((existing = skipped[ix]) == null); one_left: int _tmp = existing.TryTransact(); if (_tmp == SequenceControl.ACTIVE) { existing.SetStateRaw(SequenceControl.HOLD); new_edge.HoistPackingFrom(existing); ctrl.stats.Parsing.Packing.RecordEvent(Subsumption.SecondSubsumesFirst); } else if ((_tmp & SequenceControl.stb_Blocked) > 0) { if (c == 1) { new SpinWait().SpinOnce(); goto one_left; } continue; } skipped[ix] = null; } while (--c > 0); #if false while (skipped.Count > 0) { PassiveEdge existing = skipped[ix = (ix + 1) % skipped.Count]; int _tmp = existing.TryTransact(); if (_tmp == SequenceControl.ACTIVE) { new_edge.HoistPackingFrom(existing); /// see notes above //Interlocked.Increment(ref g_dseq); existing.EndTransact(SequenceControl.HOLD); ctrl.stats.Parsing.Packing.RecordEvent(Subsumption.SecondSubsumesFirst); } else if ((_tmp & SequenceControl.stb_Blocked) > 0) { if (skipped.Count == 1) new SpinWait().SpinOnce(); continue; } skipped.RemoveAt(ix); } #endif } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Exhaustive unpacking of all completed parses in the chart /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public ICollection<IDerivation> AllDerivations { get { ParseControl.Stats stats = ctrl.stats; if (ctrl.parser_config.packing == Config.Parser.PackingOpts.None) { Debug.Assert(stats.Parsing.Chart.c_root_edges == _completed_edges.Count); return new DeferredCollection<IDerivation>( stats.Parsing.Chart.c_root_edges, _completed_edges.Select(po => po.Derivations[0])); } else { ConcurrentList<IDerivation> ild = new ConcurrentList<IDerivation>(); foreach (var po in _completed_edges) { Debug.Assert(po.IsRestricted); if (!po.IsActive()) continue; if (ctrl.TryEnterMultithread()) { Parallel.ForEach(po.Derivations, drv => { if (drv.UnpackedTfs != null) ild.Add(drv); else Interlocked.Increment(ref stats.Parsing.Unpacking.c_rejected_derivations); }); ctrl.LeaveMultithread(); } else foreach (var drv in po.Derivations) { if (drv.UnpackedTfs != null) ild.Add(drv); else stats.Parsing.Unpacking.c_rejected_derivations++; } } stats.Parsing.Unpacking.c_derivations = ild.Count; return ild; } } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Exhaustive unpacking of all completed parses in the chart /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public int DerivationCountOnly { get { ParseControl.Stats stats = ctrl.stats; if (ctrl.parser_config.packing == Config.Parser.PackingOpts.None) { Debug.Assert(stats.Parsing.Chart.c_root_edges == _completed_edges.Count); return stats.Parsing.Chart.c_root_edges; } else { foreach (var po in _completed_edges) { Debug.Assert(po.IsRestricted); if (!po.IsActive()) continue; if (ctrl.TryEnterMultithread()) { Parallel.ForEach(po.Derivations, drv => { if (drv.UnpackedTfs == null) Interlocked.Increment(ref stats.Parsing.Unpacking.c_rejected_derivations); else Interlocked.Increment(ref stats.Parsing.Unpacking.c_derivations); }); ctrl.LeaveMultithread(); } else foreach (var drv in po.Derivations) { if (drv.UnpackedTfs == null) stats.Parsing.Unpacking.c_rejected_derivations++; else stats.Parsing.Unpacking.c_derivations++; } } return stats.Parsing.Unpacking.c_derivations; } } } }; }