using System; using System.Text; using System.Collections.Generic; using System.Runtime.Serialization.Formatters.Binary; using System.Diagnostics; using System.IO; using System.Linq; using System.Threading; using System.Threading.Tasks; using miew.ReadOnly; using miew.Enumerable; using miew.Tokenization; namespace agree { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public partial class Grammar : ISysObj { static readonly byte[] binary_signature = Encoding.ASCII.GetBytes("gee-grammar-binary\x1A"); public Config config; public TypeMgr tm; public Lexicon lex; readonly SysObj so; public SysObj SysObj { get { return so; } } readonly String s_name; public String SysObjName { get { return s_name; } } String s_description; public string SysObjDescription { get { return s_description; } } String s_author; public string Author { get { return s_author; } } GrammarNameResolver nr; public GrammarNodeLabeler nl; public Submitter sub; //internal HashSet<ParseChart> parses = new HashSet<ParseChart>(); // beware of this GC-root bool f_loaded = false; internal event Action<CommandToken, Grammar> LoadedEvent = null; public bool IsLoaded { get { return f_loaded; } } public Rule[] _rules; public GrammarRule[] _grammar_rules; public LexicalRule[] _lexical_rules; public StartSymbol[] StartSymbols; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Grammar constructors /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public Grammar(SysObj so, String name, Config config) { #if DEBUG _singleton = this; #endif this.config = config ?? new Config(); this.so = so; this.s_name = name; this.nr = new GrammarNameResolver(this); } //public Grammar(CommandToken tx, String name, String filename) // : this(tx.SystemInstance, name) //{ // this.Load(tx, filename); //} /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Examine the specified file. If the signature of a binary grammar file is found, load it. Otherwise, assume /// it's a script file that lists the component configuration and TDL files /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public void Load(CommandToken tx, String filename) { if (f_loaded) throw new Exception("grammar already loaded"); filename = Path.GetFullPath(filename); if (!File.Exists(filename) && !File.Exists(filename = Path.ChangeExtension(filename, ".gee"))) { String msg = String.Format("The file '{0}' could not be found.", filename); throw new FileNotFoundException(msg, filename); } Stopwatch stopw = Stopwatch.StartNew(); tx.TransactionStatus("Begin loading '<span style='color:#008000;'>{0}</span>'.", filename); if (!TryLoadBinary(tx, filename)) { GrammarFileSet tdlg = new GrammarFileSet(this.config, filename); this.Load(tx, tdlg); } /// the following needs the lexicon to initialize the tokenizer this.sub = new Submitter(config, this); /// Initialize the parsing component for this grammar. This parser will use this Grammar's TypeManager, /// GrammarRules, Lexicon, and the StartSymbols from tm.AllEntries. tx.TransactionStatus("Initialize parser."); //RuntimeNodeLabelConfig rnlc = new RuntimeNodeLabelConfig(tm, config._NodeLabelConfiguration); nl = new GrammarNodeLabeler( this, config.nodeLabels, tm.AllEntries.OfType<NodeLabelTemplate>(), tm.AllEntries.OfType<NodeMetaTemplate>()); StartSymbols = tm.AllEntries .OfType<StartSymbol>() .Where(ss => config.grammar.start_symbols.Contains(ss.Name)) .ToArray(); /// Expand start symbols foreach (StartSymbol ss in StartSymbols) { Tfs te_ss = ss.Expanded; } f_loaded = true; Action<CommandToken, Grammar> ev = LoadedEvent; if (ev != null) LoadedEvent.Invoke(tx, this); tx.TransactionStatus("Loaded '<span style='color:#008000;'>{0}</span>' in {1:N4} s.", filename, stopw.Elapsed.TotalSeconds); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Load grammar from a set of token lists /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void Load(CommandToken tx, GrammarFileSet tdlg) { s_description = tdlg.description ?? s_name; s_author = tdlg.author ?? ""; /// Create a type hierarchy tm = new TypeMgr(this); /// Populate type hierarchy from the specified tokens tm.LoadTypeHierarchyFromTdl(tx, tdlg); config.parser.CompileGrammarPaths(tm); /// Create and persist definition TFSs for all types tx.TransactionStatus("Parse type definitions."); ParseTypeDefinitions(tx); /// Gather and group the various entry types tm.LoadEntriesFromTdl(tx, tdlg); /// Create and persist TFSs for authored definitions tx.TransactionStatus("Parse entry definitions."); ParseEntryDefinitions(tx); /// no more strings to add. freeze the type hierarchy tm.Petrify(); #if FCTC_STATS tm.TypePatternReport(); #endif // gp loading needs some. todo: make Task<T> ExpandTypeDefinitions(tx); #if FCTC_STATS tm.TypePatternReport(); #endif if (f_garbage) Console.WriteLine(GarbageReport()); if (regress_file != null) tm.RegressionTest(Console.Out, regress_file, false); tx.TransactionStatus("Check rule compatibility."); AnalyzeRuleCompatibility(); Console.Write("scan lexicon..."); /// Create and initialize the lexicon. lex = new Lexicon(this, tm.AllEntries.OfType<LexicalEntry>(), tdlg.irregs); Console.WriteLine("done."); } public static String regress_file = null; public static bool f_garbage = false; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void AnalyzeRuleCompatibility() { using (var ttr = new miew.Debugging.TimingReport(Console.Out, "Check rule compatibility.")) { _rules = tm.AllEntries.OfType<Rule>().ToArray(); _lexical_rules = _rules.OfType<LexicalRule>().ToArray(); _grammar_rules = _rules.OfType<GrammarRule>().ToArray(); /// Check rule compatibility if (config.system.MultiThreading) { Parallel.ForEach(_rules, r => { if (r is LexicalRule) r.AnalyzeRuleCompatibility(_rules, null); else if (r is GrammarRule) r.AnalyzeRuleCompatibility(_grammar_rules, _rules); }); } else { if (_lexical_rules.Length > 0) foreach (LexicalRule r in _lexical_rules) r.AnalyzeRuleCompatibility(_rules, null); if (_grammar_rules.Length > 0) foreach (GrammarRule r in _grammar_rules) r.AnalyzeRuleCompatibility(_grammar_rules, _rules); } } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Load a grammar that was saved in binary format /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// bool TryLoadBinary(CommandToken tx, String filename) { using (FileStream str = new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read)) using (BinaryReader br = new BinaryReader(str)) { byte[] sig_check = new byte[binary_signature.Length]; br.Read(sig_check, 0, sig_check.Length); if (!sig_check.SequenceEqual(binary_signature)) return false; BinaryFormatter bf = new BinaryFormatter(); /// read configuration options config = (Config)bf.Deserialize(br.BaseStream); /// Create a type hierarchy tm = new TypeMgr(this); //todo: config.parser.CompileGrammarPaths(tm); /// Load compiled grammar tm.LoadBinary(tx, br); f_expanded_all = true; /// read lexicon lex = Lexicon.Load(this, tm.AllEntries.OfType<LexicalEntry>(), br); } return true; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Save this grammar in binary format /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public void Save(CommandToken tx, String filename) { Stopwatch stopw = Stopwatch.StartNew(); tx.TransactionStatus("Begin writing binary format '\ue099-008000{0}\ue099'.", filename); using (Stream str = File.Open(filename, FileMode.Create, FileAccess.Write, FileShare.None)) using (BinaryWriter bw = new BinaryWriter(str)) { /// write grammar signature bw.Write(binary_signature); /// write configuration options BinaryFormatter bf = new BinaryFormatter(); bf.Serialize(bw.BaseStream, config); /// Expand all types prior to saving ExpandTypeDefinitions(tx); /// write type manager tm.Save(tx, bw); /// write lexicon bf.Serialize(bw.BaseStream, lex); } tx.TransactionStatus("Wrote '\ue099-008000{0}\ue099' in {1:N4} s.", filename, stopw.Elapsed.TotalSeconds); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public String GetInfo() { StringBuilder sb = new StringBuilder(); sb.AppendFormat("<b>Information for '{0}':</b><br />", SysObjName); sb.Append("<br />"); sb.AppendFormat("Number of authored types: <span style='color:#008000;'>{0:#,#}</span><br />", tm.code_size); sb.AppendFormat("Number of GLB types: <span style='color:#008000;'>{0:#,#}</span><br />", tm.type_arr.Length - tm.code_size); sb.AppendFormat("Total types: <span style='color:#008000;'>{0:#,#}</span><br />", tm.type_arr.Length); sb.Append("<br />"); sb.AppendFormat("Number of grammar rules: <span style='color:#008000;'>{0:#,#}</span><br />", this.tm.AllEntries.Count(e => e is GrammarRule)); sb.AppendFormat("Number of lexical rules: <span style='color:#008000;'>{0:#,#}</span><br />", this.tm.AllEntries.Count(e => e.GetType() == typeof(LexicalRule))); sb.AppendFormat("Number of inflection rules: <span style='color:#008000;'>{0:#,#}</span><br />", this.tm.AllEntries.Count(e => e is MorphologicalRule)); sb.AppendFormat("Number of start symbols: <span style='color:#008000;'>{0:#,#}</span><br />", this.tm.AllEntries.Count(e => e is StartSymbol)); sb.AppendFormat("Number of node labels: <span style='color:#008000;'>{0:#,#}</span><br />", this.tm.AllEntries.Count(e => e is NodeLabel)); //sb.AppendFormat("Number of lexicon entries: <span style='color:#008000;'>{0:#,#}</span><br />", this.lex.Count); sb.Append("<br />"); //sb.AppendFormat("Number of edges: <span style='color:#008000;'>{0:#,#}</span><br />", loadtray.PoolMarkCount); return sb.ToString(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public void ParseTypeDefinitions(CommandToken tx) { tx.TransactionStatus("Loading definitions for all Types"); foreach (Instance t in tm.AllTypes) { Tfs tfs = t.Definition; } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public void ParseEntryDefinitions(CommandToken tx) { tx.TransactionStatus("loading definitions for all Entries"); if (config.system.MultiThreading) Parallel.ForEach(tm.AllEntries, t => { Tfs tfs = t.Definition; }); else foreach (Instance t in tm.AllEntries) { Tfs tfs = t.Definition; } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public bool f_expanded_all = false; public void ExpandTypeDefinitions(CommandToken tx) { tx.TransactionStatus("Expanding all type definitions."); if (f_expanded_all) return; if (config.system.MultiThreading) Parallel.ForEach(tm.AllTypes, t => { Tfs tfs = t.Expanded; }); else foreach (Type t in tm.AllTypes) { Tfs tfs = t.Expanded; } f_expanded_all = true; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public Task<ParseControl> Parse(String sent) { if (sub == null) { var tcs = new TaskCompletionSource<ParseControl>(); tcs.SetResult(null); return tcs.Task; } return sub.Parse(sent); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public Task<ParseControl> Parse(TokenSet ts) { return sub.Parse(ts); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// chart_size is not necessarily the same as the number of tokens, so don't try to combine into a ICollection... /// TokenSet can be used for a single representations. /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //public Task<ParseChart> Parse(String source_text, int chart_size, IEnumerable<IParseObj> tokens) //{ // return gp.Parse(source_text, chart_size, tokens); //} public IReadOnlyDictionary<String, ISysObj> SysObjChildren { get { return new GrammarNameResolver(this); } } public ISysObj SysObjParent { get { return so; } } class GrammarNameResolver : IReadOnlyDictionary<String, ISysObj> { Grammar g; public GrammarNameResolver(Grammar g) { this.g = g; } public bool IsReadOnly { get { return true; } } public bool ContainsKey(String key) { ISysObj o; return TryGetValue(key, out o); } public ICollection<String> Keys { get { return new ReadOnlyCollection<String>(this.Keys); } } public ICollection<ISysObj> Values { get { return new ReadOnlyCollection<ISysObj>(this.Values); } } public ISysObj this[String key] { get { ISysObj so; if (!TryGetValue(key, out so)) throw new KeyNotFoundException(); return so; } } public int Count { get { int c = 0; c += g.tm.type_dict.Count; c += g.tm.entry_dict.Count; //c += g.parses.Count; return c; } } public bool TryGetValue(String key, out ISysObj value) { Type t; if (g.tm.type_dict.TryGetValue(key, out t)) { value = t; return true; } Entry e; if (g.tm.entry_dict.TryGetValue(key, out e)) { value = e; return true; } //ParseChart pc = g.parses.FirstOrDefault(c => c.SysObjName == key); //if (pc != null) //{ // value = pc; // return true; //} value = null; return false; } public IEnumerator<KeyValuePair<String, ISysObj>> GetEnumerator() { foreach (Type t in g.tm.type_dict.Values) yield return new KeyValuePair<String, ISysObj>(t.SysObjName, t); foreach (Entry e in g.tm.entry_dict.Values) yield return new KeyValuePair<String, ISysObj>(e.SysObjName, e); //foreach (ParseChart pc in g.parses) // yield return new KeyValuePair<String, ISysObj>(pc.SysObjName, pc); } public bool Contains(KeyValuePair<String, ISysObj> item) { return this.Any(kvp => kvp.Key == item.Key && kvp.Value == item.Value); } public void CopyTo(KeyValuePair<String, ISysObj>[] array, int arrayIndex) { foreach (var kvp in this) array[arrayIndex++] = kvp; } System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { return GetEnumerator(); } }; }; }