using System;
using System.Text;
using System.Collections.Generic;
using System.Runtime.Serialization.Formatters.Binary;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Threading;
using System.Collections.ObjectModel;
using System.Threading.Tasks;
using glue.Collections.ReadOnly;
using glue.Extensions.Enumerable;
using glue.Tokenization;
namespace agree
{
public interface IGrammar : ISysObj
{
};
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public partial class Grammar : IGrammar
{
static readonly byte[] binary_signature = Encoding.ASCII.GetBytes("gee-grammar-binary\x1A");
public TypeMgr tm;
public Lexicon lex;
readonly SysObj so;
public SysObj SysObj { get { return so; } }
readonly String s_name;
public String SysObjName { get { return s_name; } }
String s_description;
public string SysObjDescription { get { return s_description; } }
String s_author;
public string Author { get { return s_author; } }
GrammarNameResolver nr;
public
GrammarNodeLabeler nl;
GrammarParser gp;
//internal HashSet<ParseChart> parses = new HashSet<ParseChart>(); // beware of this GC-root
bool f_loaded = false;
internal event Action<CommandToken, Grammar> LoadedEvent = null;
public bool IsLoaded { get { return f_loaded; } }
public Tray loadtray;
public Rule[] _rules;
public GrammarRule[] _grammar_rules;
public LexicalRule[] _lexical_rules;
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Grammar constructors
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public Grammar(SysObj so, String name)
{
this.nr = new GrammarNameResolver(this);
this.so = so;
this.s_name = name;
}
//public Grammar(CommandToken tx, String name, String filename)
// : this(tx.SystemInstance, name)
//{
// this.Load(tx, filename);
//}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Examine the specified file. If the signature of a binary grammar file is found, load it. Otherwise, assume
/// it's a script file that lists the component configuration and TDL files
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public void Load(CommandToken tx, String filename)
{
if (f_loaded)
throw new Exception("grammar already loaded");
filename = Path.GetFullPath(filename);
if (!File.Exists(filename) && !File.Exists(filename = Path.ChangeExtension(filename, ".gee")))
{
String msg = String.Format("The file '{0}' could not be found.", filename);
throw new FileNotFoundException(msg, filename);
}
Stopwatch stopw = Stopwatch.StartNew();
tx.TransactionStatus("Begin loading '<span style='color:#008000;'>{0}</span>'.", filename);
if (!TryLoadBinary(tx, filename))
{
GrammarFileSet tdlg = new GrammarFileSet(filename);
this.Load(tx, tdlg);
}
/// Initialize the parsing component for this grammar. This parser will use this Grammar's TypeManager,
/// GrammarRules, Lexicon, and the StartSymbols from tm.AllEntries.
tx.TransactionStatus("Initialize parser.");
//RuntimeNodeLabelConfig rnlc = new RuntimeNodeLabelConfig(tm, tm.config.NodeLabelConfiguration);
nl = new GrammarNodeLabeler(
tm.config.NodeLabelConfiguration,
tm.AllEntries.OfType<NodeLabelTemplate>(),
tm.AllEntries.OfType<NodeMetaTemplate>());
gp = new GrammarParser(tm.config.ParserConfiguration, this);
f_loaded = true;
Action<CommandToken, Grammar> ev = LoadedEvent;
if (ev != null)
LoadedEvent.Invoke(tx, this);
tx.TransactionStatus("Loaded '<span style='color:#008000;'>{0}</span>' in {1:N4} s.", filename, stopw.Elapsed.TotalSeconds);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Load grammar from a set of token lists
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void Load(CommandToken tx, GrammarFileSet tdlg)
{
s_description = tdlg.description ?? s_name;
s_author = tdlg.author ?? "";
/// Create a type hierarchy
tm = new TypeMgr(this, tdlg.config);
/// Populate type hierarchy from the specified tokens
tm.LoadTypeHierarchyFromTdl(tx, tdlg);
/// Create a default tray for the grammar's canonical TFSs
loadtray = TrayMgr.Allocate<ConcurrentTray>(tm, 1, -1);
tm.config.CompileQuickCheckPaths(loadtray, tdlg.quick_check_paths);
loadtray.CompileGrammarPaths(tm.config);
/// Create and persist definition TFSs for all types
tx.TransactionStatus("Parse type definitions.");
ParseTypeDefinitions(tx);
/// Gather and group the various entry types
tm.LoadEntriesFromTdl(tx, loadtray, tdlg);
/// Create and persist TFSs for authored definitions
tx.TransactionStatus("Parse entry definitions.");
ParseEntryDefinitions(tx);
/// no more strings to add. freeze the type hierarchy
tm.Petrify(loadtray /*temp*/);
#if FCTC_STATS
tm.TypePatternReport();
#endif
// gp loading needs some. todo: make Task<T>
ExpandTypeDefinitions(tx);
#if FCTC_STATS
tm.TypePatternReport();
#endif
tx.TransactionStatus("Build rule compatibility matrix.");
AnalyzeRuleCompatibility();
Console.Write("scan lexicon...");
/// Create and initialize the lexicon.
lex = new Lexicon(this, tm.AllEntries.OfType<LexicalEntry>(), tdlg.irregs);
Console.WriteLine("done.");
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
///
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void AnalyzeRuleCompatibility()
{
Console.Write("build rule compatibility matrix...");
_rules = tm.AllEntries.OfType<Rule>().ToArray();
_lexical_rules = _rules.OfType<LexicalRule>().ToArray();
_grammar_rules = _rules.OfType<GrammarRule>().ToArray();
/// Cache rule daughters in each rule and find maximum arity
foreach (Rule r in _rules)
r.InitializeDaughters();
/// Check rule compatibility
if (_lexical_rules.Length > 0)
foreach (LexicalRule r in _lexical_rules)
r.AnalyzeRuleCompatibility(_rules, null);
if (_grammar_rules.Length > 0)
foreach (GrammarRule r in _grammar_rules)
r.AnalyzeRuleCompatibility(_grammar_rules, _rules);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Load a grammar that was saved in binary format
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
bool TryLoadBinary(CommandToken tx, String filename)
{
using (FileStream str = new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read))
using (BinaryReader br = new BinaryReader(str))
{
byte[] sig_check = new byte[binary_signature.Length];
br.Read(sig_check, 0, sig_check.Length);
if (!sig_check.SequenceEqual(binary_signature))
return false;
BinaryFormatter bf = new BinaryFormatter();
/// read configuration options
GrammarConfig config = (GrammarConfig)bf.Deserialize(br.BaseStream);
/// Create a type hierarchy
tm = new TypeMgr(this, config);
//todo:
config.CompileQuickCheckPaths(loadtray, null);
loadtray.CompileGrammarPaths(config);
/// Load compiled grammar
tm.LoadBinary(tx, br);
f_expanded_all = true;
// temp
this.loadtray = tm.AllTypes.Select(t => t.Expanded).First(te => te != default(TfsEdge)).Tray;
/// read lexicon
lex = Lexicon.Load(this, tm.AllEntries.OfType<LexicalEntry>(), br);
}
return true;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Save this grammar in binary format
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public void Save(CommandToken tx, String filename)
{
Stopwatch stopw = Stopwatch.StartNew();
tx.TransactionStatus("Begin writing binary format '\ue099-008000{0}\ue099'.", filename);
using (Stream str = File.Open(filename, FileMode.Create, FileAccess.Write, FileShare.None))
using (BinaryWriter bw = new BinaryWriter(str))
{
/// write grammar signature
bw.Write(binary_signature);
/// write configuration options
BinaryFormatter bf = new BinaryFormatter();
bf.Serialize(bw.BaseStream, tm.config);
/// Expand all types prior to saving
ExpandTypeDefinitions(tx);
/// write type manager
tm.Save(tx, bw);
/// write lexicon
bf.Serialize(bw.BaseStream, lex);
}
tx.TransactionStatus("Wrote '\ue099-008000{0}\ue099' in {1:N4} s.", filename, stopw.Elapsed.TotalSeconds);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
[DebuggerBrowsable(DebuggerBrowsableState.Never)]
public GrammarParser Parser { get { return gp; } }
public String GetInfo()
{
StringBuilder sb = new StringBuilder();
sb.AppendFormat("<b>Information for '{0}':</b><br />", SysObjName);
sb.Append("<br />");
sb.AppendFormat("Number of authored types: <span style='color:#008000;'>{0:#,#}</span><br />", tm.code_size);
sb.AppendFormat("Number of GLB types: <span style='color:#008000;'>{0:#,#}</span><br />", tm.type_arr.Length - tm.code_size);
sb.AppendFormat("Total types: <span style='color:#008000;'>{0:#,#}</span><br />", tm.type_arr.Length);
sb.Append("<br />");
sb.AppendFormat("Number of grammar rules: <span style='color:#008000;'>{0:#,#}</span><br />", this.tm.AllEntries.Count(e => e is GrammarRule));
sb.AppendFormat("Number of lexical rules: <span style='color:#008000;'>{0:#,#}</span><br />", this.tm.AllEntries.Count(e => e.GetType() == typeof(LexicalRule)));
sb.AppendFormat("Number of inflection rules: <span style='color:#008000;'>{0:#,#}</span><br />", this.tm.AllEntries.Count(e => e is MorphologicalRule));
sb.AppendFormat("Number of start symbols: <span style='color:#008000;'>{0:#,#}</span><br />", this.tm.AllEntries.Count(e => e is StartSymbol));
sb.AppendFormat("Number of node labels: <span style='color:#008000;'>{0:#,#}</span><br />", this.tm.AllEntries.Count(e => e is NodeLabel));
//sb.AppendFormat("Number of lexicon entries: <span style='color:#008000;'>{0:#,#}</span><br />", this.lex.Count);
sb.Append("<br />");
sb.AppendFormat("Number of edges: <span style='color:#008000;'>{0:#,#}</span><br />", loadtray.PoolMarkCount);
return sb.ToString();
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public void ParseTypeDefinitions(CommandToken tx)
{
tx.TransactionStatus("Loading definitions for all Types");
#if false
Parallel.ForEach(AllTypes, t => t.LoadDefinition());
#else
foreach (Instance t in tm.AllTypes)
{
t.LoadDefinition(loadtray);
}
#endif
loadtray.Protect();
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public void ParseEntryDefinitions(CommandToken tx)
{
tx.TransactionStatus("loading definitions for all Entries");
#if false
Parallel.ForEach(tm.AllTypes, t => t.LoadDefinition());
#else
foreach (Instance t in tm.AllEntries)
t.LoadDefinition(loadtray);
#endif
loadtray.Protect();
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public bool f_expanded_all = false;
public void ExpandTypeDefinitions(CommandToken tx)
{
tx.TransactionStatus("Expanding all type definitions.");
if (f_expanded_all)
return;
#if CONCURRENT
Parallel.ForEach(AllTypes, t =>
{
Edge e = t.Expanded;
});
#else
TfsEdge e;
foreach (Type t in tm.AllTypes)
{
e = t.Expanded;
}
#endif
loadtray.Protect();
if (loadtray is IFreezableTray)
((IFreezableTray)loadtray).Freeze();
f_expanded_all = true;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public Task<ParseChart> Parse(String sent)
{
if (gp == null)
{
var tcs = new TaskCompletionSource<ParseChart>();
tcs.SetResult(null);
return tcs.Task;
}
return gp.Parse(sent);
}
public Task<ParseChart> Parse(TokenSet ts)
{
return gp.Parse(ts);
}
public Task<ParseChart> Parse(String source_text, int chart_size, IEnumerable<ParseChart.IParseChartToken> tokens)
{
return gp.Parse(source_text, chart_size, tokens, null);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public HashSet<PoolMark> Reachable()
{
HashSet<PoolMark> seen = new HashSet<PoolMark>();
foreach (Instance t in tm.AllInstances)
{
if (t.m_flags.HasFlag(Type.Flags.LoadedDefinition))
{
seen.UnionWith(t.Definition.PoolMarksBelow);
if (t.m_flags.HasFlag(Type.Flags.Expanded) && !t.Expanded.Equals(t.Definition))
seen.UnionWith(t.Expanded.PoolMarksBelow);
}
}
return seen;
}
public IReadOnlyDictionary<String, ISysObj> SysObjChildren
{
get { return new GrammarNameResolver(this); }
}
public ISysObj SysObjParent
{
get { return so; }
}
class GrammarNameResolver : IReadOnlyDictionary<String, ISysObj>
{
Grammar g;
public GrammarNameResolver(Grammar g)
{
this.g = g;
}
public bool IsReadOnly { get { return true; } }
public bool ContainsKey(String key)
{
ISysObj o;
return TryGetValue(key, out o);
}
public ICollection<String> Keys
{
get { return new ReadOnlyCollection<String>(this.Select(kvp => kvp.Key).ToArray()); }
}
public ICollection<ISysObj> Values
{
get { return new ReadOnlyCollection<ISysObj>(this.Select(kvp => kvp.Value).ToArray()); }
}
public ISysObj this[String key]
{
get
{
ISysObj so;
if (!TryGetValue(key, out so))
throw new KeyNotFoundException();
return so;
}
}
public int Count
{
get
{
int c = 0;
c += g.tm.type_dict.Count;
c += g.tm.entry_dict.Count;
//c += g.parses.Count;
return c;
}
}
public bool TryGetValue(String key, out ISysObj value)
{
Type t;
if (g.tm.type_dict.TryGetValue(key, out t))
{
value = t;
return true;
}
Entry e;
if (g.tm.entry_dict.TryGetValue(key, out e))
{
value = e;
return true;
}
//ParseChart pc = g.parses.FirstOrDefault(c => c.SysObjName == key);
//if (pc != null)
//{
// value = pc;
// return true;
//}
value = null;
return false;
}
public IEnumerator<KeyValuePair<String, ISysObj>> GetEnumerator()
{
foreach (Type t in g.tm.type_dict.Values)
yield return new KeyValuePair<String, ISysObj>(t.SysObjName, t);
foreach (Entry e in g.tm.entry_dict.Values)
yield return new KeyValuePair<String, ISysObj>(e.SysObjName, e);
//foreach (ParseChart pc in g.parses)
// yield return new KeyValuePair<String, ISysObj>(pc.SysObjName, pc);
}
public bool Contains(KeyValuePair<String, ISysObj> item)
{
return this.Any(kvp => kvp.Key == item.Key && kvp.Value == item.Value);
}
public void CopyTo(KeyValuePair<String, ISysObj>[] array, int arrayIndex)
{
foreach (var kvp in this)
array[arrayIndex++] = kvp;
}
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
};
};
}