//#define TEST //#define DUMP //#define LOG //#define WRITE #define DUM_DUM using System; using System.Collections.Generic; using System.ComponentModel; using System.Diagnostics; using System.IO; using System.Linq; using System.Threading; using System.Threading.Tasks; using miew.Debugging; using miew.Enumerable; using miew.Tokenization; namespace agree { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Below are the higher-level parts of ParseControl; those that deal with initializing, starting, and coordinating /// the morphology and parsing jobs. /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public sealed partial class ParseControl : ISysObj, IDisposable { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public Task MorphologyJob(TokenSet tok_set) { this.la = new LexicalAnalysis(this, lexicon, tok_set); return la.Analyze(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public void PostMorphologyJob() { stats.Morphology.SetComplete(); #if DUM_DUM /// we can start initializing the chart during the dependency filtering var init = StartAttachedChildTask<ParseChart>(() => new ParseChart(this)); #endif VerifyCoverage(); CheckChartDependencies(); #if DUM_DUM /// ...now we need it ParseChart empty_chart = init.Result; #else ParseChart empty_chart = new ParseChart(this); #endif /// adding (any) tokens into the chart immediately triggers asynchronous parsing. Using attached /// child tasks has two benefits: /// (a.) since subtasks are attached to the root task, it's ok for this function to end before /// parsing is complete; /// (b.) we don't have to worry about the parser completing all available submitted work before /// this function has finished submitting all tokens. foreach (IParseObj tok in chart_tokens) StartAttachedChildTask(empty_chart.AddParseObj, tok); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void VerifyCoverage() { var missing = Enumerable.Range(0, la.SourceItem.MinimalSpanCount).Except(la.UnionMany(ta => ta.TokenSpan.Range)); if (missing.Any()) { String msg = Span.FromIndexList(missing) .Select(sp => String.Format("\"{0}\" {1}", ts_input.Source.MinimalSpanText(sp), ts_input.Source.MinimalSpanToCharacterSpan(sp))) // todo: use ts.CharacterSpan .StringJoin(", "); throw new LexicalAnalysisException("No morpholexical solutions for: {0}.", msg); } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void CheckChartDependencies() { int c_columns = la.ChartSize; if (parser_config.ChartDependencyPathsArray == null) chart_tokens = la; else { chart_tokens = ChartDependencyFilter(); var hs = chart_tokens.UnionMany(tok => tok.TokenSpan.Range); if (hs.Count != c_columns) { String msg = Enumerable.Range(0, c_columns).Except(hs).StringJoin(", "); throw new LexicalCoverageException("No tokens were submitted to the parser for the following chart column positions: {0}.", msg); } } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public MotherDaughterTfs UnifySection( TfsSection daughter, Tfs candidate, bool f_args_delete, bool f_restrict, ref Stats._unification u_stats) { MotherDaughterTfs mdtfs; if (parser_config.unifier == Config.Parser.UnifierType.qd) mdtfs = new UnificationQd(this, f_args_delete, f_restrict).UnifySection(daughter, candidate); else if (parser_config.unifier == Config.Parser.UnifierType.n_way) mdtfs = UnificationNWay.UnifySection(this, daughter, candidate, f_args_delete, f_restrict); else mdtfs = Unification.UnifySection(daughter, candidate, f_args_delete, f_restrict, this); u_stats.RecordEvent(mdtfs); return mdtfs; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public MotherDaughterTfs UnifySections( MotherDaughterTfs mother, int i_first_arg, Tfs[] candidates, bool f_args_delete, bool f_restrict, ref Stats._unification u_stats) { MotherDaughterTfs mdtfs; if (parser_config.unifier == Config.Parser.UnifierType.qd) mdtfs = new UnificationQd(this, f_args_delete, f_restrict).UnifySections(mother, i_first_arg, candidates); else if (parser_config.unifier == Config.Parser.UnifierType.n_way) mdtfs = UnificationNWay.UnifySections(this, mother, i_first_arg, candidates, f_args_delete, f_restrict); else throw new Exception(); u_stats.RecordEvent(mdtfs); return mdtfs; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public bool QuickCheckMorph(Tfs tfs1, Tfs tfs2) { if (!parser_config.f_qc_morph || parser_config.qcs == null) return false; //stats.Morphology.QuickCheck.c_evaluated++; if (!parser_config.qcs.CanSkip(tfs1, tfs2)) return false; //stats.Morphology.QuickCheck.c_avoided++; return true; } public bool QuickCheckParse(Tfs tfs1, Tfs tfs2) { if (!parser_config.f_qc_parse || parser_config.qcs == null) return false; stats.Parsing.QuickCheck.c_evaluated++; if (!parser_config.qcs.CanSkip(tfs1, tfs2)) return false; stats.Parsing.QuickCheck.c_avoided++; return true; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// [DebuggerDisplay("{token.GetHashCode().ToString(\"X8\"),nq} {type.Name,nq} {System.String.Join(\".\",target_path),nq}")] struct ChartDependency : IEquatable<ChartDependency> { public LexicalAnalysis.AnalysisStack token; public Type type; public FsPath target_path; public bool Equals(ChartDependency other) { if (target_path == null && other.target_path != null) return false; return token == other.token && type == other.type && target_path.SequenceEqual(other.target_path, StringComparer.InvariantCultureIgnoreCase); } public override int GetHashCode() { int hc = 0; if (token != null) hc += token.GetHashCode(); if (type != null) hc += type.GetHashCode(); if (target_path != null) hc += target_path.Aggregate(0, (av, s) => av ^ s.GetHashCode()); return hc; } }; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Get chart dependencies for the specified token, if any /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// IEnumerable<ChartDependency> GetDependencies(LexicalAnalysis.AnalysisStack tok) { Tfs fs = tok.Tfs; foreach (var cdp in parser_config.ChartDependencyPathsArray) { TfsSection ts = fs.GetSection(cdp.path1); if (ts != null) yield return new ChartDependency { token = tok, type = ts.Type, target_path = cdp.path2 }; } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Filter tokens whose dependencies are not met out of the specified sequence /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// IEnumerable<IParseObj> ChartDependencyFilter() { Debug.Assert(parser_config.ChartDependencyPathsArray != null); if (parser_config.ChartDependencyScope == Config.Parser.DependencyScope.TwoWay) throw new NotImplementedException("two-way chart dependencies not implemented, use 'unidirectional-chart-dependencies' if possible"); var stacks = la.OfType<LexicalAnalysis.AnalysisStack>().ToArray(); /// Group all dependencies according to the introducing token var probation = stacks.SelectMany(tok => GetDependencies(tok)).Distinct().GroupBy(cd => cd.token); if (!probation.Any()) Nop.CodeCoverage(); /// Tokens which have no dependency are accepted. var satisfied = stacks.Except(probation.Select(grp => grp.Key)).ToHashSet(); /// eek. find those probationary tokens for which all of their chart dependencies have at least one satisfied /// token (in a non-overlapping chart position) containing a compatible type at the specified path. /// /// this was nice before I had to redo it when I realized that the satisfying tokens are not limited to the /// initial set, but must spread transitively as new ones become satisfied... int last_count; do { last_count = satisfied.Count; satisfied.UnionWith(probation .Where(grp => !satisfied.Contains(grp.Key) && grp.All(dpnd => satisfied .Where(other_tok => !other_tok.TokenSpan.Overlaps(grp.Key.TokenSpan)) .Any(other_tok => { TfsSection ts = other_tok.Tfs.GetSection(dpnd.target_path); if (ts == null) return false; int id1 = dpnd.type.m_id; int id2 = ts.Type.m_id; if (id1 == 0 || id2 == 0 || id1 == id2) return true; return tm.CanUnify(id1, id2); }))) .Select(tok_deps => tok_deps.Key)); } while (satisfied.Count > last_count); return satisfied; } }; }