using System.Runtime.Serialization.Formatters.Binary;
using System.Diagnostics;
using System.IO;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using miew.Enumerable;
using miew.Tokenization;
namespace agree
{
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
///
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
[Serializable]
public class Lexicon : ILookup<String, LexicalEntry>
{
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public struct Irreg
{
public String stem;
public LexicalRule rule;
};
[DebuggerDisplay("ix: {index} {lex_entry.ToString(),nq}")]
public struct NonInitialMwe
{
public int index;
public LexicalEntry lex_entry;
};
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
///
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public Lexicon(Grammar g, IEnumerable<LexicalEntry> lex_entries, List<GrammarFileSet.IrregInfo> irregs)
{
this.g = g;
this.lex_entries = lex_entries;
#if false
text_comparer = g.tm.config.options.HasFlag(Globals.Options.CaseSensitive) ?
StringComparer.InvariantCulture :
StringComparer.InvariantCultureIgnoreCase;
#else
text_comparer = StringComparer.InvariantCultureIgnoreCase;
#endif
List<LexicalEntry> mw = new List<LexicalEntry>();
lex_lookup = MultiWordFork(lex_entries, mw).ToLookup(lex => lex.Lemmata[0], text_comparer);
mwe_lookup = mw
.SelectMany(e => e.Lemmata.Skip(1).Select((w, ix) => new { w, index = ix + 1, lex_entry = e }))
.ToLookup(a => a.w, a => new NonInitialMwe { index = a.index, lex_entry = a.lex_entry }, text_comparer);
if (irregs != null)
{
irreg_dict = new Dictionary<String, List<Irreg>>(StringComparer.InvariantCultureIgnoreCase);
foreach (var iri in irregs)
{
Entry r;
if (!g.tm.entry_dict.TryGetValue(iri.s_rule, out r))
{
String msg = String.Format("Rule '{0}' listed as a rule in the irregs file is not a recognized rule.", iri.s_rule);
throw new Exception(msg);
}
Irreg irg = new Irreg();
irg.rule = r as LexicalRule;
if (irg.rule == null)
{
String msg = String.Format("Rule '{0}' listed as a rule in the irregs file is not an inflection rule.", iri.s_rule);
throw new Exception(msg);
}
irg.stem = iri.stem;
List<Irreg> lirg;
if (!irreg_dict.TryGetValue(iri.inflected, out lirg))
irreg_dict.Add(iri.inflected, lirg = new List<Irreg>());
lirg.Add(irg);
}
}
non_morph_lexrules = g._lexical_rules.OfExactType<LexicalRule>().ToArray();
morph_lexrules = g._lexical_rules.OfType<MorphologicalRule>().ToArray();
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
[NonSerialized]
Grammar g;
public IEnumerable<LexicalEntry> lex_entries;
[NonSerialized]
ILookup<String, LexicalEntry> lex_lookup;
public ILookup<String, NonInitialMwe> mwe_lookup;
[NonSerialized]
public Dictionary<String, List<Irreg>> irreg_dict = null;
[NonSerialized]
public MorphologicalRule[] morph_lexrules;
[NonSerialized]
public LexicalRule[] non_morph_lexrules;
public IEqualityComparer<String> text_comparer;
public IEqualityComparer<String> TextComparer { get { return text_comparer; } }
public Grammar Grammar { get { return g; } }
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// utility use: echo the input, but add any multi-word lexical entries to the provided list
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
IEnumerable<LexicalEntry> MultiWordFork(IEnumerable<LexicalEntry> input, List<LexicalEntry> mw)
{
foreach (LexicalEntry le in input)
{
yield return le;
if (le.Lemmata.Count > 1)
mw.Add(le);
}
}
public static Lexicon Load(Grammar g, IEnumerable<LexicalEntry> le, BinaryReader br)
{
BinaryFormatter bf = new BinaryFormatter();
Lexicon l = (Lexicon)bf.Deserialize(br.BaseStream);
l.lex_lookup = le.ToLookup(lex => lex.Lemmata[0], l.text_comparer);
l.g = g;
return l;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// ILookup(String, LexicalEntry) implementation follows
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public bool Contains(String key)
{
return lex_lookup.Contains(key);
}
public int Count
{
get { return lex_lookup.Count; }
}
public IEnumerable<LexicalEntry> this[String key]
{
get { return lex_lookup[key]; }
}
public IEnumerator<IGrouping<String, LexicalEntry>> GetEnumerator()
{
return lex_lookup.GetEnumerator();
}
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
///
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
[DebuggerDisplay("{ToString(),nq}")]
public partial class LexicalEntry : DemandExpandEntry
{
public readonly String[] words = null;
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Note: LexicalEntries are not expanded for the purpose of extracting the orthography. It is obtained from the entry
/// Definition, and thus in this design orthography cannot be unified-in as part of a grammar's type expansion.
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public LexicalEntry(Type t, String name, List<BaseFeatConstraint> bfc)
: base(t, name, bfc)
{
/// Extract the orthography and cache it
Edge e;
Tfs _def = BootstrapDefinition();
if (tm.config.parser.OrthPath.GetEdge(_def, out e))
{
String s = tm.GetStringValue(e.FlagsId);
if (s != null)
words = new String[] { s };
else
words = _def.GetListEdges(e).SelectNotNull(le => tm.GetStringValue(le.FlagsId)).ToArray();
}
if (words == null)
throw new TfsException("Putative lexical entry '{0}' does not have any orthography at the path '{1}'",
Name,
tm.config.grammar.orth_path);
}
public IList<String> Lemmata
{
get { return words; }
}
public override string ToString()
{
return String.Format("{0} {1} {2}",
Name.PadRight(20),
InstanceType.Name.PadRight(20),
words.Select(w => "[" + w + "]").StringJoin(" "));
}
};
}