using System; using System.Text; using System.Collections.Generic; using System.Runtime.Serialization.Formatters.Binary; using System.Diagnostics; using System.IO; using System.Linq; using System.Threading; using System.Collections.ObjectModel; using System.Threading.Tasks; using glue.Collections.ReadOnly; using glue.Extensions.Enumerable; using glue.Tokenization; namespace agree { public interface IGrammar : ISysObj { }; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public partial class Grammar : IGrammar { static readonly byte[] binary_signature = Encoding.ASCII.GetBytes("gee-grammar-binary\x1A"); public TypeMgr tm; public Lexicon lex; readonly SysObj so; public SysObj SysObj { get { return so; } } readonly String s_name; public String SysObjName { get { return s_name; } } String s_description; public string SysObjDescription { get { return s_description; } } String s_author; public string Author { get { return s_author; } } GrammarNameResolver nr; public GrammarNodeLabeler nl; GrammarParser gp; //internal HashSet<ParseChart> parses = new HashSet<ParseChart>(); // beware of this GC-root bool f_loaded = false; internal event Action<CommandToken, Grammar> LoadedEvent = null; public bool IsLoaded { get { return f_loaded; } } public Tray loadtray; public Rule[] _rules; public GrammarRule[] _grammar_rules; public LexicalRule[] _lexical_rules; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Grammar constructors /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public Grammar(SysObj so, String name) { this.nr = new GrammarNameResolver(this); this.so = so; this.s_name = name; } //public Grammar(CommandToken tx, String name, String filename) // : this(tx.SystemInstance, name) //{ // this.Load(tx, filename); //} /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Examine the specified file. If the signature of a binary grammar file is found, load it. Otherwise, assume /// it's a script file that lists the component configuration and TDL files /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public void Load(CommandToken tx, String filename) { if (f_loaded) throw new Exception("grammar already loaded"); filename = Path.GetFullPath(filename); if (!File.Exists(filename) && !File.Exists(filename = Path.ChangeExtension(filename, ".gee"))) { String msg = String.Format("The file '{0}' could not be found.", filename); throw new FileNotFoundException(msg, filename); } Stopwatch stopw = Stopwatch.StartNew(); tx.TransactionStatus("Begin loading '<span style='color:#008000;'>{0}</span>'.", filename); if (!TryLoadBinary(tx, filename)) { GrammarFileSet tdlg = new GrammarFileSet(filename); this.Load(tx, tdlg); } /// Initialize the parsing component for this grammar. This parser will use this Grammar's TypeManager, /// GrammarRules, Lexicon, and the StartSymbols from tm.AllEntries. tx.TransactionStatus("Initialize parser."); //RuntimeNodeLabelConfig rnlc = new RuntimeNodeLabelConfig(tm, tm.config.NodeLabelConfiguration); nl = new GrammarNodeLabeler( tm.config.NodeLabelConfiguration, tm.AllEntries.OfType<NodeLabelTemplate>(), tm.AllEntries.OfType<NodeMetaTemplate>()); gp = new GrammarParser(tm.config.ParserConfiguration, this); f_loaded = true; Action<CommandToken, Grammar> ev = LoadedEvent; if (ev != null) LoadedEvent.Invoke(tx, this); tx.TransactionStatus("Loaded '<span style='color:#008000;'>{0}</span>' in {1:N4} s.", filename, stopw.Elapsed.TotalSeconds); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Load grammar from a set of token lists /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void Load(CommandToken tx, GrammarFileSet tdlg) { s_description = tdlg.description ?? s_name; s_author = tdlg.author ?? ""; /// Create a type hierarchy tm = new TypeMgr(this, tdlg.config); /// Populate type hierarchy from the specified tokens tm.LoadTypeHierarchyFromTdl(tx, tdlg); /// Create a default tray for the grammar's canonical TFSs loadtray = TrayMgr.Allocate<ConcurrentTray>(tm, 1, -1); tm.config.CompileQuickCheckPaths(loadtray, tdlg.quick_check_paths); loadtray.CompileGrammarPaths(tm.config); /// Create and persist definition TFSs for all types tx.TransactionStatus("Parse type definitions."); ParseTypeDefinitions(tx); /// Gather and group the various entry types tm.LoadEntriesFromTdl(tx, loadtray, tdlg); /// Create and persist TFSs for authored definitions tx.TransactionStatus("Parse entry definitions."); ParseEntryDefinitions(tx); /// no more strings to add. freeze the type hierarchy tm.Petrify(loadtray /*temp*/); #if FCTC_STATS tm.TypePatternReport(); #endif // gp loading needs some. todo: make Task<T> ExpandTypeDefinitions(tx); #if FCTC_STATS tm.TypePatternReport(); #endif tx.TransactionStatus("Build rule compatibility matrix."); AnalyzeRuleCompatibility(); Console.Write("scan lexicon..."); /// Create and initialize the lexicon. lex = new Lexicon(this, tm.AllEntries.OfType<LexicalEntry>(), tdlg.irregs); Console.WriteLine("done."); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void AnalyzeRuleCompatibility() { Console.Write("build rule compatibility matrix..."); _rules = tm.AllEntries.OfType<Rule>().ToArray(); _lexical_rules = _rules.OfType<LexicalRule>().ToArray(); _grammar_rules = _rules.OfType<GrammarRule>().ToArray(); /// Cache rule daughters in each rule and find maximum arity foreach (Rule r in _rules) r.InitializeDaughters(); /// Check rule compatibility if (_lexical_rules.Length > 0) foreach (LexicalRule r in _lexical_rules) r.AnalyzeRuleCompatibility(_rules, null); if (_grammar_rules.Length > 0) foreach (GrammarRule r in _grammar_rules) r.AnalyzeRuleCompatibility(_grammar_rules, _rules); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Load a grammar that was saved in binary format /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// bool TryLoadBinary(CommandToken tx, String filename) { using (FileStream str = new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read)) using (BinaryReader br = new BinaryReader(str)) { byte[] sig_check = new byte[binary_signature.Length]; br.Read(sig_check, 0, sig_check.Length); if (!sig_check.SequenceEqual(binary_signature)) return false; BinaryFormatter bf = new BinaryFormatter(); /// read configuration options GrammarConfig config = (GrammarConfig)bf.Deserialize(br.BaseStream); /// Create a type hierarchy tm = new TypeMgr(this, config); //todo: config.CompileQuickCheckPaths(loadtray, null); loadtray.CompileGrammarPaths(config); /// Load compiled grammar tm.LoadBinary(tx, br); f_expanded_all = true; // temp this.loadtray = tm.AllTypes.Select(t => t.Expanded).First(te => te != default(TfsEdge)).Tray; /// read lexicon lex = Lexicon.Load(this, tm.AllEntries.OfType<LexicalEntry>(), br); } return true; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Save this grammar in binary format /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public void Save(CommandToken tx, String filename) { Stopwatch stopw = Stopwatch.StartNew(); tx.TransactionStatus("Begin writing binary format '\ue099-008000{0}\ue099'.", filename); using (Stream str = File.Open(filename, FileMode.Create, FileAccess.Write, FileShare.None)) using (BinaryWriter bw = new BinaryWriter(str)) { /// write grammar signature bw.Write(binary_signature); /// write configuration options BinaryFormatter bf = new BinaryFormatter(); bf.Serialize(bw.BaseStream, tm.config); /// Expand all types prior to saving ExpandTypeDefinitions(tx); /// write type manager tm.Save(tx, bw); /// write lexicon bf.Serialize(bw.BaseStream, lex); } tx.TransactionStatus("Wrote '\ue099-008000{0}\ue099' in {1:N4} s.", filename, stopw.Elapsed.TotalSeconds); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// [DebuggerBrowsable(DebuggerBrowsableState.Never)] public GrammarParser Parser { get { return gp; } } public String GetInfo() { StringBuilder sb = new StringBuilder(); sb.AppendFormat("<b>Information for '{0}':</b><br />", SysObjName); sb.Append("<br />"); sb.AppendFormat("Number of authored types: <span style='color:#008000;'>{0:#,#}</span><br />", tm.code_size); sb.AppendFormat("Number of GLB types: <span style='color:#008000;'>{0:#,#}</span><br />", tm.type_arr.Length - tm.code_size); sb.AppendFormat("Total types: <span style='color:#008000;'>{0:#,#}</span><br />", tm.type_arr.Length); sb.Append("<br />"); sb.AppendFormat("Number of grammar rules: <span style='color:#008000;'>{0:#,#}</span><br />", this.tm.AllEntries.Count(e => e is GrammarRule)); sb.AppendFormat("Number of lexical rules: <span style='color:#008000;'>{0:#,#}</span><br />", this.tm.AllEntries.Count(e => e.GetType() == typeof(LexicalRule))); sb.AppendFormat("Number of inflection rules: <span style='color:#008000;'>{0:#,#}</span><br />", this.tm.AllEntries.Count(e => e is MorphologicalRule)); sb.AppendFormat("Number of start symbols: <span style='color:#008000;'>{0:#,#}</span><br />", this.tm.AllEntries.Count(e => e is StartSymbol)); sb.AppendFormat("Number of node labels: <span style='color:#008000;'>{0:#,#}</span><br />", this.tm.AllEntries.Count(e => e is NodeLabel)); //sb.AppendFormat("Number of lexicon entries: <span style='color:#008000;'>{0:#,#}</span><br />", this.lex.Count); sb.Append("<br />"); sb.AppendFormat("Number of edges: <span style='color:#008000;'>{0:#,#}</span><br />", loadtray.PoolMarkCount); return sb.ToString(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public void ParseTypeDefinitions(CommandToken tx) { tx.TransactionStatus("Loading definitions for all Types"); #if false Parallel.ForEach(AllTypes, t => t.LoadDefinition()); #else foreach (Instance t in tm.AllTypes) { t.LoadDefinition(loadtray); } #endif loadtray.Protect(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public void ParseEntryDefinitions(CommandToken tx) { tx.TransactionStatus("loading definitions for all Entries"); #if false Parallel.ForEach(tm.AllTypes, t => t.LoadDefinition()); #else foreach (Instance t in tm.AllEntries) t.LoadDefinition(loadtray); #endif loadtray.Protect(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public bool f_expanded_all = false; public void ExpandTypeDefinitions(CommandToken tx) { tx.TransactionStatus("Expanding all type definitions."); if (f_expanded_all) return; #if CONCURRENT Parallel.ForEach(AllTypes, t => { Edge e = t.Expanded; }); #else TfsEdge e; foreach (Type t in tm.AllTypes) { e = t.Expanded; } #endif loadtray.Protect(); if (loadtray is IFreezableTray) ((IFreezableTray)loadtray).Freeze(); f_expanded_all = true; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public Task<ParseChart> Parse(String sent) { if (gp == null) { var tcs = new TaskCompletionSource<ParseChart>(); tcs.SetResult(null); return tcs.Task; } return gp.Parse(sent); } public Task<ParseChart> Parse(TokenSet ts) { return gp.Parse(ts); } public Task<ParseChart> Parse(String source_text, int chart_size, IEnumerable<ParseChart.IParseChartToken> tokens) { return gp.Parse(source_text, chart_size, tokens, null); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public HashSet<PoolMark> Reachable() { HashSet<PoolMark> seen = new HashSet<PoolMark>(); foreach (Instance t in tm.AllInstances) { if (t.m_flags.HasFlag(Type.Flags.LoadedDefinition)) { seen.UnionWith(t.Definition.PoolMarksBelow); if (t.m_flags.HasFlag(Type.Flags.Expanded) && !t.Expanded.Equals(t.Definition)) seen.UnionWith(t.Expanded.PoolMarksBelow); } } return seen; } public IReadOnlyDictionary<String, ISysObj> SysObjChildren { get { return new GrammarNameResolver(this); } } public ISysObj SysObjParent { get { return so; } } class GrammarNameResolver : IReadOnlyDictionary<String, ISysObj> { Grammar g; public GrammarNameResolver(Grammar g) { this.g = g; } public bool IsReadOnly { get { return true; } } public bool ContainsKey(String key) { ISysObj o; return TryGetValue(key, out o); } public ICollection<String> Keys { get { return new ReadOnlyCollection<String>(this.Select(kvp => kvp.Key).ToArray()); } } public ICollection<ISysObj> Values { get { return new ReadOnlyCollection<ISysObj>(this.Select(kvp => kvp.Value).ToArray()); } } public ISysObj this[String key] { get { ISysObj so; if (!TryGetValue(key, out so)) throw new KeyNotFoundException(); return so; } } public int Count { get { int c = 0; c += g.tm.type_dict.Count; c += g.tm.entry_dict.Count; //c += g.parses.Count; return c; } } public bool TryGetValue(String key, out ISysObj value) { Type t; if (g.tm.type_dict.TryGetValue(key, out t)) { value = t; return true; } Entry e; if (g.tm.entry_dict.TryGetValue(key, out e)) { value = e; return true; } //ParseChart pc = g.parses.FirstOrDefault(c => c.SysObjName == key); //if (pc != null) //{ // value = pc; // return true; //} value = null; return false; } public IEnumerator<KeyValuePair<String, ISysObj>> GetEnumerator() { foreach (Type t in g.tm.type_dict.Values) yield return new KeyValuePair<String, ISysObj>(t.SysObjName, t); foreach (Entry e in g.tm.entry_dict.Values) yield return new KeyValuePair<String, ISysObj>(e.SysObjName, e); //foreach (ParseChart pc in g.parses) // yield return new KeyValuePair<String, ISysObj>(pc.SysObjName, pc); } public bool Contains(KeyValuePair<String, ISysObj> item) { return this.Any(kvp => kvp.Key == item.Key && kvp.Value == item.Value); } public void CopyTo(KeyValuePair<String, ISysObj>[] array, int arrayIndex) { foreach (var kvp in this) array[arrayIndex++] = kvp; } System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { return GetEnumerator(); } }; }; }