using System; using System.Collections.Generic; using System.ComponentModel; using System.Diagnostics; using System.IO; using System.Linq; using System.Reflection; using System.Threading; using System.Threading.Tasks; using agree.Parse; using glue; using glue.Debugging; using glue.Extensions.Enumerable; using glue.Tokenization; namespace agree { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public class GrammarParser { ParserConfiguration parser_config; Grammar g; StartSymbol[] starts; TextWriter tw_debug = null; ITokenizer tokenizer = null; public GrammarParser(ParserConfiguration parser_config, Grammar g) { this.parser_config = parser_config; this.g = g; this.starts = g.tm.AllEntries .OfType<StartSymbol>() .Union(g.tm.rgtt_start_symbols) .ToArray(); ConfigureTokenizer(); parser_config.PropertyChanged += new PropertyChangedEventHandler(OnParserConfigChanged); //hack hack for ERG var hackhack = new HashSet<String> { "root_strict", "root_informal", "root_frag", "root_inffrag" }; if (g.tm.AllEntries.OfType<StartSymbol>().Any(ss => hackhack.Contains(ss.Name))) { this.starts = g.tm.AllEntries .OfType<StartSymbol>() .Where(ss => hackhack.Contains(ss.Name)) .Union(g.tm.rgtt_start_symbols) .ToArray(); } //hack hack (end) } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public Grammar Grammar { get { return g; } } public TypeMgr TypeMgr { get { return g.tm; } } public Lexicon Lexicon { get { return g.lex; } } public Tray TargetTray { get { return g.loadtray; } } public TextWriter DebugOutput { get { return tw_debug; } } public ParserConfiguration Config { get { return parser_config; } } public ITokenizer Tokenizer { get { return tokenizer; } } public void SetDebugOutput(TextWriter tw) { this.tw_debug = tw; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// [DebuggerDisplay("{token.GetHashCode().ToString(\"X8\"),nq} {type.Name,nq} {System.String.Join(\".\",target_path),nq}")] struct ChartDependency : IEquatable<ChartDependency> { public LexicalAnalysis.AnalysisStack token; public Type type; public FsPath target_path; public bool Equals(ChartDependency other) { if (target_path == null && other.target_path != null) return false; return token == other.token && type == other.type && target_path.SequenceEqual(other.target_path, StringComparer.InvariantCultureIgnoreCase); } public override int GetHashCode() { int hc = 0; if (token != null) hc += token.GetHashCode(); if (type != null) hc += type.GetHashCode(); if (target_path != null) hc += target_path.Aggregate(0, (av, s) => av ^ s.GetHashCode()); return hc; } }; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Get chart dependencies for the specified token, if any /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// IEnumerable<ChartDependency> GetDependencies(LexicalAnalysis.AnalysisStack tok) { TfsEdge fs = tok.Contents; foreach (var cdp in Config.ChartDependencyPathsArray) { TfsEdge te; if (fs.Tray.GetTfsEdgeAtPath(fs.Edge, cdp.path1, out te)) { yield return new ChartDependency { token = tok, type = te.Type, target_path = cdp.path2 }; } } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Filter tokens whose dependencies are not met out of the specified sequence /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// IEnumerable<ParseChart.IParseChartToken> ChartDependencyFilter(IEnumerable<ParseChart.IParseChartToken> tokens) { if (Config.ChartDependencyPathsArray == null) return tokens; if (Config.ChartDependencyScope == ParserConfiguration.DependencyScope.TwoWay) throw new NotImplementedException("two-way chart dependencies not implemented, use 'unidirectional-chart-dependencies' if possible"); var stacks = tokens.OfType<LexicalAnalysis.AnalysisStack>(); /// Group all dependencies according to the introducing token var probation = stacks.SelectMany(tok => GetDependencies(tok)).Distinct().GroupBy(cd => cd.token); if (!probation.Any()) Nop.CodeCoverage(); /// Tokens which have no dependency are accepted. var satisfied = stacks.Except(probation.Select(grp => grp.Key)).ToHashSet(); /// eek. find those probationary tokens for which all of their chart dependencies have at least one satisfied /// token (in a non-overlapping chart position) containing a compatible type at the specified path. /// /// this was nice before I had to redo it when I realized that the satisfying tokens are not limited to the /// initial set, but must spread transitively as new ones become satisfied... int last_count; do { last_count = satisfied.Count; satisfied.UnionWith(probation .Where(grp => !satisfied.Contains(grp.Key) && grp.All(dpnd => satisfied .Where(other_tok => !other_tok.ChartSpan.Overlaps(grp.Key.ChartSpan)) .Any(other_tok => { TfsEdge e; return other_tok.Contents.Tray.GetTfsEdgeAtPath(other_tok.Contents.Edge, dpnd.target_path, out e) && g.tm.CanUnify(dpnd.type.m_id, e.Type.m_id); }))) .Select(tok_deps => tok_deps.Key)); } while (satisfied.Count > last_count); /// Delete edges for the stacks we aren't going to use foreach (var tok in stacks) if (!satisfied.Contains(tok)) tok.Dispose(); return satisfied; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Do not specify AttachedToParent (or rethrown exception will propagate into the parent task, which is not /// accessible to this function) and do not specify the chart's CancelTok (or this task's act of cancelling the /// parse will put a task-canceled exception into the continuation, and trump/erase the actual exception) /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public Task<ParseChart> Parse(String s) { return Parse(new TokenizedString(s, tokenizer)); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public Task<ParseChart> Parse(TokenSet ts) { using (var q = new ThreadPriorityRegion(ThreadPriority.AboveNormal)) { LexicalAnalysis lah = Lexicon.AnalyzeHypothesis(ts, TargetTray); var missing = Enumerable.Range(0, lah.SourceItem.MinimalSpanCount) .Except(lah.UnionMany(ta => ta.ChartSpan.Range)); if (missing.Any()) { String s1 = Span.FromIndexList(missing) .Select(sp => String.Format("\"{0}\" {1}", ts.Source.MinimalSpanText(sp), ts.Source.MinimalSpanToCharacterSpan(sp))) .StringJoin(", "); throw new ParseException("No morpholexical solutions for: {0}.", s1); } return Parse(ts.Source.Text, lah.ChartSize, lah, ts); } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public Task<ParseChart> Parse(String source_text, int chart_size, IEnumerable<ParseChart.IParseChartToken> tokens, TokenSet _ts) { using (var q = new ThreadPriorityRegion(ThreadPriority.AboveNormal)) { ParseChart chart = new ParseChart( this, TargetTray, source_text, starts, chart_size, ChartDependencyFilter(tokens), _ts); if (tw_debug != null) chart.SetDebugOutput(tw_debug); //hack LexicalAnalysis lah = tokens as LexicalAnalysis; if (lah != null) { chart.lah = lah; } return chart.ParseAsync().ContinueWith<ParseChart>(t => { if (t.Exception != null) { chart.Dispose(); AggregateException aex = t.Exception.Flatten(); var rgex = aex.InnerExceptions.OfType<ParseException>().Distinct(e => e.Id).ToArray(); if (rgex.Length == 1) throw rgex[0]; throw aex; } //g.parses.Add(chart); return chart; }, CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default); } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void OnParserConfigChanged(object sender, PropertyChangedEventArgs e) { switch (e.PropertyName) { case "TokenizerType": ConfigureTokenizer(); break; } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void ConfigureTokenizer() { System.Type T_tok = parser_config.TokenizerType; ConstructorInfo ci; if (T_tok == null) { tokenizer = new SpaceCharTokenizer(); } else if (T_tok.GetConstructor(new System.Type[0]) != null) { tokenizer = (ITokenizer)Activator.CreateInstance(T_tok); } else if ((ci = T_tok.GetConstructor(new System.Type[] { typeof(Func<String, bool>) })) != null) { Func<String, bool> af = w => Lexicon[w].Any(q => q.Lemmata.Count == 1); tokenizer = (ITokenizer)ci.Invoke(new Object[] { af }); } else { throw new Exception(String.Format("Could not determine how to construct the tokenizer '{0}'", T_tok.Name)); } } }; }