//#define CIL_EMIT
//#define FCTC_STATS
using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Reflection;
using System.Reflection.Emit;
using System.Threading;
using miew.BitArray;
using miew.Debugging;
using miew.Enumerable;
using miew.String;
namespace agree
{
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public unsafe partial class TypeMgr
{
public Allocator da;
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
///
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public TypeMgr(Grammar g)
{
(this.g = g).tm = this;
this.config = g.config;
da = new Allocator(this);
this.strings = new Strings(this);
this.fcm = new FeatureConfig.Manager(this);
TopType = Type.MakeTopType(this, config.types.top_type);
TopTfs = new BareTfs(TopType);
type_dict.Add(config.types.top_type, TopType);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Contiguously assigned, zero-based type id numbers are used for GLB caching. Sorted topologically because
/// topwards types are expected to be more heavily used, so they should allocate the base of the triangular cache
/// matrix, so as to avoid wasted
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public BareTfs TopTfs;
public const int TopId = 0;
public int StringId;
public Type[] type_arr;
public Dictionary<String, Type> type_dict = new Dictionary<String, Type>(StringComparer.OrdinalIgnoreCase);
public int c_types = -1;
public int c_t_bits = -1;
public int code_size;
int next_glb_num = 1;
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public FeatureConfig.Manager fcm;
public FeatureConfig[] fc_by_index = null;
public FeatureConfig[] fc_by_type = null;
public int[][] rgrgfix_by_type;
public int[][] rgrgfix_autotune;
public FeatureInfo[] feat_arr;
public Dictionary<String, FeatureInfo> feat_map = new Dictionary<String, FeatureInfo>(StringComparer.OrdinalIgnoreCase);
public Dictionary<String, Entry> entry_dict = new Dictionary<String, Entry>(StringComparer.OrdinalIgnoreCase);
public IList<Type> AllTypes { get { return type_arr; } }
public IEnumerable<Entry> AllEntries { get { return entry_dict.Values; } }
public IEnumerable<Instance> AllInstances { get { return AllTypes.AsEnumerable<Instance>().Concat(AllEntries); } }
public ICollection<String> Features { get { return feat_map.Keys; } }
public Config config;
public Strings strings;
public Grammar g;
bool f_petrified = false;
/// certain special types
public Type TopType;
public Type StringType;
public Type tt_list_head;
public Type tt_list;
public Type tt_empty;
public Type tt_dlist_list;
public Type tt_dlist_last;
public Type tt_key_daughter;
public StartSymbol[] rgtt_start_symbols;
public Type tt_label;
public Type tt_meta;
/// feature index for certain special features
public int f_ix_list_head;
public int f_ix_list_tail;
public int f_ix_dlist_list;
public int f_ix_dlist_last;
public int ms_expand = 0;
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public void LoadTypeHierarchyFromTdl(CommandToken tx, GrammarFileSet tdlg)
{
/// Aggregate Type token streams into groups and load the authored type hierarchy from these identifier groups.
tx.TransactionStatus("Loading types.");
LoadTypes(new TokenGrouper(TokenDefinitionGroup.Type.Tdl, tdlg.tdl));
tdlg.tdl = null;
/// Embed the bounded complete partial order in the type graph
tx.TransactionStatus("Computing Greatest Lower Bound closure of the type hierarchy.");
EmbedBcpo();
/// Read all Features and attach them to their introducing type. Validate appropriateness conditions and determine
/// maximal types for all features. Ensure the validity of the type hierarchy according to certain properties of
/// the formalism.
tx.TransactionStatus("Loading features.");
LoadFeatures();
/// Resolve special types: attach special configuration types used in lists, etc.
tx.TransactionStatus("Resolve special types.");
ResolveSpecialTypes();
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Aggregate Entry token streams into groups, construct Entries. Release references to the original tokens
/// as soon as they are no longer needed. In the future, we may wish to retain these to enable live
/// editing scenarios.
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public void LoadEntriesFromTdl(CommandToken tx, GrammarFileSet tdlg)
{
/// 1. basic grammar rule entries
tx.TransactionStatus("Loading grammar rule entries.");
LoadEntries(new TokenGrouper(TokenDefinitionGroup.Type.GrammarRule, tdlg.grules));
tdlg.grules = null;
/// 2. lexical rule entries
tx.TransactionStatus("Loading lexical rule entries.");
LoadEntries(new TokenGrouper(TokenDefinitionGroup.Type.LexicalRule, tdlg.irules));
tdlg.irules = null;
/// 3. lexical entries
tx.TransactionStatus("Scanning lexical entries.");
LoadEntries(new TokenGrouper(TokenDefinitionGroup.Type.Lexicon, tdlg.lexicon));
tdlg.lexicon = null;
/// 4. node label entries
tx.TransactionStatus("Loading node label entries.");
LoadEntries(new TokenGrouper(TokenDefinitionGroup.Type.Label, tdlg.labels));
tdlg.labels = null;
/// 5. start symbol entries
tx.TransactionStatus("Loading start symbol entries.");
LoadEntries(new TokenGrouper(TokenDefinitionGroup.Type.Start, tdlg.roots));
tdlg.roots = null;
/// Resolve start symbols and some special rule types
ResolveSpecialEntries();
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public Type GetMaximalTypeForFeatureToken(TdlTok tok)
{
if (tok.t != TdlTok.Type.Identifier)
TdlTokenizer.ErrorExit(tok, "Expected a feature: '{0}'", tok.t.ToString());
FeatureInfo fi;
if (!feat_map.TryGetValue(tok.i_s, out fi))
TdlTokenizer.ErrorExit(tok, "Unknown feature: '{0}'", tok.i_s);
return fi.maximal_type;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public Type GetMaximalTypeForFeature(int i_feat)
{
return feat_arr[i_feat].maximal_type;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public int GetFeatureIndex(String f)
{
FeatureInfo fi;
return feat_map.TryGetValue(f, out fi) ? fi.i_feat : -1;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// initialize Entry dictionary with all Entry symbol names
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public void LoadEntries(IEnumerable<TokenDefinitionGroup> file_edefs)
{
foreach (TokenDefinitionGroup tdg in file_edefs)
{
if (tdg.f_append)
TdlTokenizer.ErrorExit(tdg.tok_ident, "Cannot append type information in an entry description.");
TdlTok id_tok = tdg.tok_ident;
Type tt_cur = TopType;
foreach (TdlTok tok_par in tdg.conj_par)
{
Type tt_par;
if (!type_dict.TryGetValue(tok_par.i_s, out tt_par))
TdlTokenizer.ErrorExit(tok_par, String.Format("Entry description uses undefined parent type {0}", tok_par.i_s));
int tid_new;
if ((tid_new = GetGlbCheckTopAndIdentity(tt_cur.m_id, tt_par.m_id)) == -1)
TdlTokenizer.ErrorExit(tok_par, String.Format("Parent types {0} specified for entry description {1} do not unify",
tdg.conj_par.Select(t => t.i_s).StringJoin(" "),
tdg.Identifier));
tt_cur = type_arr[tid_new];
}
Entry e;
if (entry_dict.TryGetValue(id_tok.i_s, out e))
TdlTokenizer.ErrorExit(id_tok, String.Format("Symbol {0} is already defined.", id_tok.i_s));
switch (tdg.type)
{
case TokenDefinitionGroup.Type.GrammarRule:
e = new GrammarRule(tt_cur, id_tok.i_s, tdg.m_bfc);
break;
case TokenDefinitionGroup.Type.LexicalRule:
if (tdg.morph_subrules == null || tdg.morph_subrules.Count == 0)
e = new LexicalRule(tt_cur, id_tok.i_s, tdg.m_bfc);
else
e = new MorphologicalRule(tt_cur, id_tok.i_s, tdg.m_bfc, tdg.morph_subrules);
tdg.morph_subrules = null;
break;
case TokenDefinitionGroup.Type.Lexicon:
e = new LexicalEntry(tt_cur, id_tok.i_s, tdg.m_bfc);
break;
case TokenDefinitionGroup.Type.Label:
if (tdg.conj_par.Any(tk => StringComparer.InvariantCultureIgnoreCase.Compare(tk.i_s, tt_label.Name) == 0))
e = new NodeLabelTemplate(tt_cur, id_tok.i_s, tdg.m_bfc);
else if (tdg.conj_par.Any(tk => StringComparer.InvariantCultureIgnoreCase.Compare(tk.i_s, tt_meta.Name) == 0))
e = new NodeMetaTemplate(tt_cur, id_tok.i_s, tdg.m_bfc);
else
{
String msg = String.Format(
"'{0}' must be either a label template derived from type '{1}' or a meta template derived from type '{2}'",
id_tok.i_s,
tt_label.Name,
tt_meta.Name);
TdlTokenizer.ErrorExit(id_tok, msg);
}
break;
case TokenDefinitionGroup.Type.Start:
e = new StartSymbol(tt_cur, id_tok.i_s, tdg.m_bfc);
break;
default:
throw new InvalidOperationException();
}
Debug.Assert(tdg.morph_subrules == null);
entry_dict.Add(id_tok.i_s, e);
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public Type LookupType(TdlTok tok)
{
String s_type = tok.i_s;
Type tt;
if (!type_dict.TryGetValue(s_type, out tt))
TdlTokenizer.ErrorExit(tok, "Invalid type: '{0}'", s_type);
return tt;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Type AddType(String s_name, List<BaseFeatConstraint> bfc)
{
Type t = new Type(this, s_name, bfc);
type_dict.Add(s_name, t);
return t;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// initialize Type dictionary with all Type names
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public void LoadTypes(IEnumerable<TokenDefinitionGroup> file_tdefs)
{
/// Since we make two passes on the enumerable, pre-load the grouping computation if necessary
TokenDefinitionGroup[] rgtdg = file_tdefs as TokenDefinitionGroup[] ?? file_tdefs.ToArray();
foreach (TokenDefinitionGroup tdg in rgtdg)
{
TdlTok id_tok = tdg.tok_ident;
Type tfst;
if (!tdg.f_append)
{
if (tdg.conj_par.Count == 0 && tdg.type != TokenDefinitionGroup.Type.Label)
TdlTokenizer.ErrorExit(id_tok, "Type defines no parents.");
if (type_dict.TryGetValue(id_tok.i_s, out tfst))
TdlTokenizer.ErrorExit(id_tok, String.Format("{0} already defined", id_tok.i_s));
tfst = AddType(id_tok.i_s, tdg.m_bfc);
}
}
// check and build each type's parent types
foreach (TokenDefinitionGroup tdg in rgtdg)
{
if (tdg.f_append)
{
Type tfst_master;
if (!type_dict.TryGetValue(tdg.Identifier, out tfst_master))
TdlTokenizer.ErrorExit(tdg.tok_ident, "Cannot append type information to undefined type.");
tfst_master.SetConstraints(tdg.m_bfc);
}
Type tfst_cur = type_dict[tdg.Identifier];
foreach (TdlTok tok_par in tdg.conj_par)
{
Type tfst;
if (!type_dict.TryGetValue(tok_par.i_s, out tfst))
TdlTokenizer.ErrorExit(tok_par, String.Format("Type uses undefined parent type {0}", tok_par.i_s));
if (tfst_cur == tfst)
TdlTokenizer.ErrorExit(tok_par, String.Format("Type {0} must not refer to itself as a parent", tok_par.i_s));
tfst_cur.AddIParent(tfst);
}
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// EmbedBcpo
///
/// Embed the type hierarchy in a bounded complete partial order (BCPO) lattice by inserting greatest-
/// lower bound (GLB) types as required.
///
/// References:
/// Hassan Ait-Kaci, Robert Boyer, Patrick Lincoln, Roger Nasr. 1989. "Efficient Implementation of Lattice
/// Operations"
/// Ulrich Callmeier. 2001. "Efficient Parsing with Large-Scale Unification Grammars" p.28-30.
/// Gerald Penn. 2000. "The Algebraic Structure of Attributed Type Signatures" p. 30-34
/// Bernd Kiefer, Hans-Ulrich Krieger, John Carroll, Rob Malouf. "A Bag of Useful Techniques for Efficient
/// and Robust Parsing" p. 475.
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public void EmbedBcpo()
{
// retain the number of types prior to adding GLBs
code_size = type_dict.Count;
// check for inheritance from *top* and assign a maximal-depth value to each node
foreach (Type t in type_dict.Values)
t.FindLevel();
// generate Aït-Kaci transitive-closure codes for all user-specified types, in order according to
// the depth values just determined.
code_dict = new Dictionary<BitArr, Type>();
Type[] bitpos2type = new Type[code_size];
int cur_code = code_size - 1;
foreach (Type tt in type_dict.Values.OrderByDescending(e => e.m_level))
{
BitArr code = new BitArr(code_size);
code[cur_code] = true;
foreach (Type tc in tt.Children)
code.OrEq(tc.m_code);
code_dict.Add(tt.m_code = code, tt);
bitpos2type[cur_code] = tt;
cur_code--;
}
/// determine codes for the GLB types which will be needed to close the graph
Type[] rg_tfs = type_dict.Values.Where(t => t.m_flags != Type.Flags.Leaf).ToArray();
while (true)
{
List<Type> added_glbs = new List<Type>();
for (int i = TopId + 1; i < rg_tfs.Length; i++)
{
Type t0 = rg_tfs[i];
BitArr ba0 = t0.m_code;
for (int j = i + 1; j < rg_tfs.Length; j++)
{
Type t1 = rg_tfs[j];
// todo: fast test and with hash
if (ba0.FastTest(t1.m_code))
{
BitArr z = ba0.AndWithHash(t1.m_code);
ushort thislevel = t0.m_level;
if (t1.m_level > thislevel)
thislevel = t1.m_level;
Type glbt;
if (code_dict.TryGetValue(z, out glbt))
{
if (thislevel > glbt.m_level)
glbt.m_level = thislevel;
}
else
{
glbt = AddType("glbtype" + (next_glb_num++), null);
glbt.m_flags |= Type.Flags.GlbType;
glbt.m_code = z;
glbt.m_level = thislevel;
code_dict.Add(z, glbt);
added_glbs.Add(glbt);
}
}
}
}
if (added_glbs.Count == 0)
break;
rg_tfs = added_glbs.ToArray();
}
c_types = code_dict.Count;
if (c_types >= (int)Edge.Flag.LowestFlagValue)
throw new Exception(String.Format("System supports a maximum of {0} types and {1} types were specified.", (int)Edge.Flag.LowestFlagValue, c_types));
c_t_bits = BitArr.HighestOne(c_types) + 1;
/// sort all types including the new GLB types into a certain topological order
type_arr = code_dict
.Values
.OrderBy(e => e.m_level)
.ThenByDescending(e => e.m_code.OnesCount())
.ToArray();
/// create a set of negations for all the codes, for use in step 1.
BitArr[] notcodes = new BitArr[c_types];
for (int i = 0; i < c_types; i++)
notcodes[i] = ~type_arr[i].m_code;
int ops = 0;
List<Type> draft = new List<Type>();
BitArr r = new BitArr(code_size);
for (int i = c_types - 1; i > 0; i--)
{
Type node = type_arr[i];
/// node ids correspond to their index in 'type_arr'. Set it now.
node.m_id = (int)i;
/// Looking only at GLBs, we can directly obtain the transitive closure of the graph by carefully manipulating
/// their parents and children.
if (!node.IsGlb)
continue;
/// 1. This GLB as a parent: add its children
/// 1a. get a list of draft candidates: all descendants of 'node'
/// The order in which these are added is important because it determines the order of testing children
/// in step 1b. It's just a topological order. The list is needed because children may be eliminated from
/// the list before the point at which they'd otherwise be added.
draft.Clear();
for (int j = i + 1; j < c_types; j++)
{
Type leafwards = type_arr[j];
if (!leafwards.m_code.FastTest(notcodes[i]))
{
draft.Add(leafwards);
AddGlb(j, i, (Edge.Flag)leafwards.m_id);
}
}
/// 1b. pick the subset of immediate children from this list
/// While the list is not empty, add the oldest item as a child and then remove all of its descendants from the
/// list.
while (draft.Count > 0)
{
Type new_child = draft[0];
draft.RemoveAt(0);
if (!new_child.IsGlb)
foreach (Type par in new_child.Parents.ToArray())
{
ops++;
if (node.m_code.AndGivesExact(par.m_code))
new_child.RemoveIParent(par);
}
node.AddIChild(new_child);
for (int q = 0; q < draft.Count; q++)
{
ops++;
if (draft[q].m_code.AndGivesExact(new_child.m_code))
draft.RemoveAt(q--);
}
}
/// 2. This GLB as a child: add its parents
/// 2a. get a list of draft candidates between 'node' and *top*
draft.Clear();
BitArr nc = node.m_code;
for (int j = i - 1; j >= 0; j--)
{
Type topwards = type_arr[j];
if ((topwards.m_flags & Instance.Flags.Leaf) == 0)
{
ops++;
if (nc.AndGivesExact(topwards.m_code))
{
draft.Add(topwards);
if (j > 0)
AddGlb(i, j, (Edge.Flag)node.m_id);
}
}
}
/// 2b. pick a minimal set of parents from this list.
/// Each selection allows us to eliminate others others from the list. This effect is maximized by starting
/// with candidates with the fewest additional bits set beyond the required match. This was the reason for
/// the secondary sort on 'arr' above.
r.CopyFrom(nc);
for (int j = 0; j < draft.Count; j++)
{
Type parent = draft[j];
if (parent.m_code.FastTestNotArg(r))
{
r.OrEq(parent.m_code);
for (int k = j + 1; k < draft.Count; k++)
{
ops++;
if (parent.m_code.AndGivesExact(draft[k].m_code))
draft.RemoveAt(k--);
}
// all GLB-to-GLB edges are all added as children, above
if (!parent.IsGlb)
{
foreach (uint bit_pos in nc.OnesPositions())
parent.RemoveIChild(bitpos2type[bit_pos]);
node.AddIParent(parent);
}
}
}
}
for (int i = 1; i < c_types; i++)
{
Type parent = type_arr[i];
if (!parent.IsGlb)
foreach (Type child in parent.AllDescendants)
AddGlb(i, child.m_id, (Edge.Flag)child.m_id);
}
Console.WriteLine("types {0} closed {1} glb {2} ops {3}", code_size, type_arr.Length, type_arr.Length - code_size, ops);
Debug.Assert(type_arr[TopId] == TopType && TopType.m_id == TopId);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public void LoadFeatures()
{
/// The uninitialized value (zero) of the HasAnyFeatures flag means the type is unconstrainable.
/// Mark all types below any occurrence of a constraint as non-atomic.
/// One problem with this code is that the tdl, "t := *top* & [ ]." will cause 't' and its descendants
/// to be marked as having features
foreach (Type tt in type_arr)
{
if ((tt.m_flags & (Type.Flags.HasConstraints | Type.Flags.HasAnyFeatures)) == Type.Flags.HasConstraints)
tt.SetHasAnyFeaturesFlagBelow();
}
/// Atomic types are those featureless types without features in their subgraph
foreach (Type tt in type_arr)
{
if ((tt.m_flags & (Type.Flags.HasAnyFeatures | Type.Flags.Atomic)) == 0)
{
HashSet<Type> hs = tt.AllDescendants;
if (!hs.Any(e => e.HasAnyFeatures))
{
tt.m_flags |= Type.Flags.Atomic;
foreach (Type tta in hs)
tta.m_flags |= Type.Flags.Atomic;
}
}
}
// Add features to a maximal introduction table
// References:
// Ann Copestake "The Compleat LKB" 1993, p. 93.
// Carroll, Copestake, Malouf, Oepen - LKB checktypes.lsp
// Ann Copestake "Implementing Typed Feature Structure Grammars" 2002
Dictionary<String, List<Type>> _maximal_types = new Dictionary<String, List<Type>>();
foreach (Type tt in type_arr.Where(e => e.HasAnyFeatures && e.m_bfc != null))
foreach (BaseFeatConstraint bfc in tt.m_bfc)
{
if (bfc[0].t != TdlTok.Type.Identifier)
TdlTokenizer.ErrorExit(bfc[0], "Expected an identifier");
String feat = bfc[0].i_s;
List<Type> l;
if (_maximal_types.TryGetValue(feat, out l))
{
if (!l.Any(e => tt.IsSubtypeOf(e)))
{
bool f_any = false;
for (int i = 0; i < l.Count; i++)
{
if (l[i] == tt || l[i].IsSubtypeOf(tt))
{
if (f_any)
l.RemoveAt(i--);
else
{
l[i] = tt;
f_any = true;
}
}
}
if (!f_any)
l.Add(tt);
}
}
else
{
l = new List<Type>() { tt };
_maximal_types.Add(feat, l);
}
}
// Set the local features. Check for multiple introduction
int i_feat = 0;
foreach (var kvp in _maximal_types)
{
Debug.Assert(kvp.Value.Count != 0, "Internal error in temporary maximal types computation.");
List<Type> lt = kvp.Value;
Type max_type = lt[0];
/// Feature should be introduced by only one type in the type hierarchy
if (lt.Count > 1)
TdlTokenizer.ErrorExit(String.Format("Feature {0} is introduced at multiple types ({1})",
kvp.Key,
lt.Select(tt => tt.Name).StringJoin(", ")));
/// add the feature and associate it with its maximal introducing type
feat_map.Add(kvp.Key, new FeatureInfo(kvp.Key, i_feat, max_type));
max_type.AddLocalFeature(i_feat);
i_feat++;
}
_maximal_types = null;
// construct reverse table which maps the feature index back to the feature info
feat_arr = new FeatureInfo[i_feat];
foreach (var fi in feat_map.Values)
feat_arr[fi.i_feat] = fi;
// Do the following in topological order
for (int i = 0; i < c_types; i++)
type_arr[i].AddNonLocalFeatures();
// freeze feature configurations. will bind FCs to tray later
fc_by_index = fcm.OrderBy(fc => fc.ConfigIndex).ToArray();
fc_by_type = type_arr.Select(t => t.fc).ToArray();
rgrgfix_by_type = rgrgfix_autotune = type_arr.Select(t => t.fc.rg_fix).ToArray();
UnsafeInit(rgrgfix_by_type);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Re-tune the feature unification order based on observed running failure counts
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public void Autotune()
{
fcm.RetuneAll();
rgrgfix_autotune = type_arr.Select(t => t.fc.rg_fix_autotune).ToArray();
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Resolve some static types
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public void ResolveSpecialTypes()
{
if (!type_dict.TryGetValue(config.types.string_type, out StringType))
//StringType = AddType(config.types.string_type, null);
throw new TdlException("The string type '{0}' was not defined in any TDL file.", config.types.string_type);
this.StringId = strings.string_id = StringType.m_id;
FeatureInfo fi;
if (!feat_map.TryGetValue(config.types.f_list_head, out fi))
throw new TdlException("The list head feature '{0}' was not defined in any TDL file.", config.types.f_list_head);
f_ix_list_head = fi.i_feat;
tt_list_head = fi.maximal_type;
if (!feat_map.TryGetValue(config.types.f_list_tail, out fi))
throw new TdlException("The list tail feature '{0}' was not defined in any TDL file.", config.types.f_list_tail);
f_ix_list_tail = fi.i_feat;
if (!type_dict.TryGetValue(config.types.typs_list, out tt_list))
throw new TdlException("The list type '{0}' was not defined in any TDL file.", config.types.typs_list);
if (!type_dict.TryGetValue(config.types.typs_empty_list, out tt_empty))
throw new TdlException("The empty list type '{0}' was not defined in any TDL file.", config.types.typs_empty_list);
if (!type_dict.TryGetValue(config.types.typs_diff_list, out tt_dlist_list))
throw new TdlException("The difference list type '{0}' was not defined in any TDL file.", config.types.typs_diff_list);
if (!feat_map.TryGetValue(config.types.f_dlist_list, out fi))
throw new TdlException("The difference list 'LIST' feature '{0}' was not defined in any TDL file.", config.types.f_dlist_list);
f_ix_dlist_list = fi.i_feat;
if (!feat_map.TryGetValue(config.types.f_dlist_last, out fi))
throw new TdlException("The difference list 'LAST' feature '{0}' was not defined in any TDL file.", config.types.f_dlist_last);
f_ix_dlist_last = fi.i_feat;
tt_dlist_last = tt_list;
if (!type_dict.TryGetValue(config.grammar.typs_key_daughter, out tt_key_daughter))
{
/// LKB sample grammars do not define the LKB default asserted type '+' for KEY-ARG
Console.Error.WriteLine("warning: The key daughter path type '{0}' was not defined in any TDL file.", config.grammar.typs_key_daughter);
config.grammar.typs_key_daughter = config.types.top_type;
tt_key_daughter = TopType;
//throw new TdlException("The key daughter path type '{0}' was not defined in any TDL file.", config.typs_key_daughter);
}
String s = config.nodeLabels.LabelTemplateType;
if (s != null && !type_dict.TryGetValue(s, out tt_label))
throw new TdlException("The node label template type '{0}' was not defined in any TDL file.", config.nodeLabels.LabelTemplateType);
s = config.nodeLabels.MetaTemplateType;
if (s != null && !type_dict.TryGetValue(s, out tt_meta))
{
//throw new TdlException("The node meta template type '{0}' was not defined in any TDL file.", config._NodeLabelConfiguration.MetaTemplateType);
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
///
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public void ResolveSpecialEntries()
{
if (config.grammar.start_symbols == null)
config.grammar.start_symbols = new List<String> { "root" };
rgtt_start_symbols = new StartSymbol[config.grammar.start_symbols.Count];
int i = 0;
foreach (String ss in config.grammar.start_symbols)
{
Entry e;
if (!entry_dict.TryGetValue(ss, out e))
{
entry_dict.Add(ss, e = new StartSymbol(TopType, ss, null));
//throw new TdlException("Start symbol type '{0}' was not defined in any TDL file.", ss);
}
if ((rgtt_start_symbols[i] = e as StartSymbol) == null)
{
throw new TdlException("Type '{0}', which is listed as a start symbol, was not defined in the start symbols file.", ss);
}
i++;
}
if (config.parser.span_only_rules != null && config.parser.span_only_rules.Count > 0)
{
foreach (String ss in config.parser.span_only_rules)
{
Entry ent;
if (!entry_dict.TryGetValue(ss, out ent))
throw new TdlException("Span only rule type '{0}' was not defined in any TDL file.", ss);
var gr = ent as GrammarRule;
if (gr == null)
throw new TdlException("Span only rule type '{0}' is not a grammar rule.", ss);
gr.IsSpanOnly = true;
}
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
///
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public void Petrify()
{
/// calculate how many bits will be used for stringid/typeid
int c_f_bits = 32 - BitArr.HighestOne((int)Edge.Flag.LowestFlagValue);
int c_s_bits = BitArr.HighestOne(strings.Count) + 1;
int c_ts_bits = Math.Max(c_t_bits, c_s_bits);
if ((Edge.Flag)((1 << c_ts_bits) - 1) > Edge.Flag.MultiIdMask)
throw new TdlException("Too many types ({0}) or strings ({1}) defined in the type hierarchy.", c_types, strings.Count);
UpdateGlbCacheBareTypes();
#if FCTC_STATS
var fconfigs = AllTypes
.Select(t => new { type = t, features = t.fc })
.GroupBy(a => a.features, a => a.type)
.ToArray();
Console.WriteLine("{0} types ({1}b), {2} strings ({3}b), {4} feat in {5} configurations (max feat/type {6}). -> edge bits: flags:{7} fconfigs:{8} sid/tid: {9}",
c_types,
c_t_bits,
strings.Count,
c_s_bits,
feat_arr.Length,
fconfigs.Length,
fconfigs.Max(grp => grp.Key.Count),
c_f_bits,
32 - c_f_bits - c_ts_bits,
c_ts_bits);
#endif
f_petrified = true;
}
public bool IsPetrified { get { return f_petrified; } }
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
///
/// </summary>
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#if FCTC_STATS
public void TypePatternReport()
{
int node_count = 0;
int distinct_TC = 0;
var nodes = g.loadtray.Pools
.SelectMany(cp => cp.PoolMarks)
.GroupBy(pm => pm._in_mark)
.ToArray();
foreach (var zx in nodes
.Select(grp => new
{
types_used = grp.Select(pm => pm.ConstraintType)
.OrderBy(t => t.m_id)
.Select(t => t.m_id.ToString())
.StringJoin("-"),
fc = fcm.Get(grp.Select(pm => pm.Pool.i_feat))
})
.GroupBy(a => a.fc, a => a.types_used))
{
String[] vz = zx.Distinct().ToArray();
//Console.WriteLine("{0}\t{1} features\t{2} uses\t{3} distinct", zx.Key, zx.Key.Count, zx.Count(), vz.Length);
foreach (var zs in vz.Take(100))
{
// Console.WriteLine("\t{0}", zs);
}
distinct_TC += vz.Length;
node_count += zx.Count();
}
Debug.Assert(node_count == nodes.Length);
int c_allbarenodes = nodes
.Where(grp => grp.All(pm => pm.Constraint.Mark == 0))
.Count();
double avg_eligible_load = nodes
.Where(grp => grp.All(pm => pm.Constraint.Mark == 0))
.Average(grp => grp.Count());
Console.WriteLine("nodes {0} distinct TC {1} {2:00.00}% all-bare nodes {3} {4:00.00}% avg. load {5:00.00}",
node_count,
distinct_TC,
100.0 * distinct_TC / node_count,
c_allbarenodes,
100.0 * c_allbarenodes / node_count,
avg_eligible_load);
//if (Unification.c_all_unif != 0.0)
// Console.WriteLine("unif {0} / {1} {2:00.00}%", Unification.c_skip_unif, Unification.c_all_unif, 100.0 * Unification.c_skip_unif / Unification.c_all_unif);
//if (Unification.c_all_feat != 0.0)
// Console.WriteLine("feat {0} / {1} {2:00.00}%", Unification.c_skip_feat, Unification.c_all_feat, 100.0 * Unification.c_skip_feat / Unification.c_all_feat);
//Unification.c_skip_unif = 0;
//Unification.c_all_unif = 0;
//Unification.c_skip_feat = 0;
//Unification.c_all_feat = 0;
}
#endif
};
}