//#define CIL_EMIT //#define FCTC_STATS using System; using System.Collections; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Reflection; using System.Reflection.Emit; using System.Threading; using miew.BitArray; using miew.Debugging; using miew.Enumerable; using miew.String; namespace agree { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public unsafe partial class TypeMgr { public Allocator da; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public TypeMgr(Grammar g) { (this.g = g).tm = this; this.config = g.config; da = new Allocator(this); this.strings = new Strings(this); this.fcm = new FeatureConfig.Manager(this); TopType = Type.MakeTopType(this, config.types.top_type); TopTfs = new BareTfs(TopType); type_dict.Add(config.types.top_type, TopType); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Contiguously assigned, zero-based type id numbers are used for GLB caching. Sorted topologically because /// topwards types are expected to be more heavily used, so they should allocate the base of the triangular cache /// matrix, so as to avoid wasted /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public BareTfs TopTfs; public const int TopId = 0; public int StringId; public Type[] type_arr; public Dictionary<String, Type> type_dict = new Dictionary<String, Type>(StringComparer.OrdinalIgnoreCase); public int c_types = -1; public int c_t_bits = -1; public int code_size; int next_glb_num = 1; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public FeatureConfig.Manager fcm; public FeatureConfig[] fc_by_index = null; public FeatureConfig[] fc_by_type = null; public int[][] rgrgfix_by_type; public int[][] rgrgfix_autotune; public FeatureInfo[] feat_arr; public Dictionary<String, FeatureInfo> feat_map = new Dictionary<String, FeatureInfo>(StringComparer.OrdinalIgnoreCase); public Dictionary<String, Entry> entry_dict = new Dictionary<String, Entry>(StringComparer.OrdinalIgnoreCase); public IList<Type> AllTypes { get { return type_arr; } } public IEnumerable<Entry> AllEntries { get { return entry_dict.Values; } } public IEnumerable<Instance> AllInstances { get { return AllTypes.AsEnumerable<Instance>().Concat(AllEntries); } } public ICollection<String> Features { get { return feat_map.Keys; } } public Config config; public Strings strings; public Grammar g; bool f_petrified = false; /// certain special types public Type TopType; public Type StringType; public Type tt_list_head; public Type tt_list; public Type tt_empty; public Type tt_dlist_list; public Type tt_dlist_last; public Type tt_key_daughter; public StartSymbol[] rgtt_start_symbols; public Type tt_label; public Type tt_meta; /// feature index for certain special features public int f_ix_list_head; public int f_ix_list_tail; public int f_ix_dlist_list; public int f_ix_dlist_last; public int ms_expand = 0; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public void LoadTypeHierarchyFromTdl(CommandToken tx, GrammarFileSet tdlg) { /// Aggregate Type token streams into groups and load the authored type hierarchy from these identifier groups. tx.TransactionStatus("Loading types."); LoadTypes(new TokenGrouper(TokenDefinitionGroup.Type.Tdl, tdlg.tdl)); tdlg.tdl = null; /// Embed the bounded complete partial order in the type graph tx.TransactionStatus("Computing Greatest Lower Bound closure of the type hierarchy."); EmbedBcpo(); /// Read all Features and attach them to their introducing type. Validate appropriateness conditions and determine /// maximal types for all features. Ensure the validity of the type hierarchy according to certain properties of /// the formalism. tx.TransactionStatus("Loading features."); LoadFeatures(); /// Resolve special types: attach special configuration types used in lists, etc. tx.TransactionStatus("Resolve special types."); ResolveSpecialTypes(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Aggregate Entry token streams into groups, construct Entries. Release references to the original tokens /// as soon as they are no longer needed. In the future, we may wish to retain these to enable live /// editing scenarios. /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public void LoadEntriesFromTdl(CommandToken tx, GrammarFileSet tdlg) { /// 1. basic grammar rule entries tx.TransactionStatus("Loading grammar rule entries."); LoadEntries(new TokenGrouper(TokenDefinitionGroup.Type.GrammarRule, tdlg.grules)); tdlg.grules = null; /// 2. lexical rule entries tx.TransactionStatus("Loading lexical rule entries."); LoadEntries(new TokenGrouper(TokenDefinitionGroup.Type.LexicalRule, tdlg.irules)); tdlg.irules = null; /// 3. lexical entries tx.TransactionStatus("Scanning lexical entries."); LoadEntries(new TokenGrouper(TokenDefinitionGroup.Type.Lexicon, tdlg.lexicon)); tdlg.lexicon = null; /// 4. node label entries tx.TransactionStatus("Loading node label entries."); LoadEntries(new TokenGrouper(TokenDefinitionGroup.Type.Label, tdlg.labels)); tdlg.labels = null; /// 5. start symbol entries tx.TransactionStatus("Loading start symbol entries."); LoadEntries(new TokenGrouper(TokenDefinitionGroup.Type.Start, tdlg.roots)); tdlg.roots = null; /// Resolve start symbols and some special rule types ResolveSpecialEntries(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public Type GetMaximalTypeForFeatureToken(TdlTok tok) { if (tok.t != TdlTok.Type.Identifier) TdlTokenizer.ErrorExit(tok, "Expected a feature: '{0}'", tok.t.ToString()); FeatureInfo fi; if (!feat_map.TryGetValue(tok.i_s, out fi)) TdlTokenizer.ErrorExit(tok, "Unknown feature: '{0}'", tok.i_s); return fi.maximal_type; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public Type GetMaximalTypeForFeature(int i_feat) { return feat_arr[i_feat].maximal_type; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public int GetFeatureIndex(String f) { FeatureInfo fi; return feat_map.TryGetValue(f, out fi) ? fi.i_feat : -1; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// initialize Entry dictionary with all Entry symbol names /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public void LoadEntries(IEnumerable<TokenDefinitionGroup> file_edefs) { foreach (TokenDefinitionGroup tdg in file_edefs) { if (tdg.f_append) TdlTokenizer.ErrorExit(tdg.tok_ident, "Cannot append type information in an entry description."); TdlTok id_tok = tdg.tok_ident; Type tt_cur = TopType; foreach (TdlTok tok_par in tdg.conj_par) { Type tt_par; if (!type_dict.TryGetValue(tok_par.i_s, out tt_par)) TdlTokenizer.ErrorExit(tok_par, String.Format("Entry description uses undefined parent type {0}", tok_par.i_s)); int tid_new; if ((tid_new = GetGlbCheckTopAndIdentity(tt_cur.m_id, tt_par.m_id)) == -1) TdlTokenizer.ErrorExit(tok_par, String.Format("Parent types {0} specified for entry description {1} do not unify", tdg.conj_par.Select(t => t.i_s).StringJoin(" "), tdg.Identifier)); tt_cur = type_arr[tid_new]; } Entry e; if (entry_dict.TryGetValue(id_tok.i_s, out e)) TdlTokenizer.ErrorExit(id_tok, String.Format("Symbol {0} is already defined.", id_tok.i_s)); switch (tdg.type) { case TokenDefinitionGroup.Type.GrammarRule: e = new GrammarRule(tt_cur, id_tok.i_s, tdg.m_bfc); break; case TokenDefinitionGroup.Type.LexicalRule: if (tdg.morph_subrules == null || tdg.morph_subrules.Count == 0) e = new LexicalRule(tt_cur, id_tok.i_s, tdg.m_bfc); else e = new MorphologicalRule(tt_cur, id_tok.i_s, tdg.m_bfc, tdg.morph_subrules); tdg.morph_subrules = null; break; case TokenDefinitionGroup.Type.Lexicon: e = new LexicalEntry(tt_cur, id_tok.i_s, tdg.m_bfc); break; case TokenDefinitionGroup.Type.Label: if (tdg.conj_par.Any(tk => StringComparer.InvariantCultureIgnoreCase.Compare(tk.i_s, tt_label.Name) == 0)) e = new NodeLabelTemplate(tt_cur, id_tok.i_s, tdg.m_bfc); else if (tdg.conj_par.Any(tk => StringComparer.InvariantCultureIgnoreCase.Compare(tk.i_s, tt_meta.Name) == 0)) e = new NodeMetaTemplate(tt_cur, id_tok.i_s, tdg.m_bfc); else { String msg = String.Format( "'{0}' must be either a label template derived from type '{1}' or a meta template derived from type '{2}'", id_tok.i_s, tt_label.Name, tt_meta.Name); TdlTokenizer.ErrorExit(id_tok, msg); } break; case TokenDefinitionGroup.Type.Start: e = new StartSymbol(tt_cur, id_tok.i_s, tdg.m_bfc); break; default: throw new InvalidOperationException(); } Debug.Assert(tdg.morph_subrules == null); entry_dict.Add(id_tok.i_s, e); } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public Type LookupType(TdlTok tok) { String s_type = tok.i_s; Type tt; if (!type_dict.TryGetValue(s_type, out tt)) TdlTokenizer.ErrorExit(tok, "Invalid type: '{0}'", s_type); return tt; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// Type AddType(String s_name, List<BaseFeatConstraint> bfc) { Type t = new Type(this, s_name, bfc); type_dict.Add(s_name, t); return t; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// initialize Type dictionary with all Type names /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public void LoadTypes(IEnumerable<TokenDefinitionGroup> file_tdefs) { /// Since we make two passes on the enumerable, pre-load the grouping computation if necessary TokenDefinitionGroup[] rgtdg = file_tdefs as TokenDefinitionGroup[] ?? file_tdefs.ToArray(); foreach (TokenDefinitionGroup tdg in rgtdg) { TdlTok id_tok = tdg.tok_ident; Type tfst; if (!tdg.f_append) { if (tdg.conj_par.Count == 0 && tdg.type != TokenDefinitionGroup.Type.Label) TdlTokenizer.ErrorExit(id_tok, "Type defines no parents."); if (type_dict.TryGetValue(id_tok.i_s, out tfst)) TdlTokenizer.ErrorExit(id_tok, String.Format("{0} already defined", id_tok.i_s)); tfst = AddType(id_tok.i_s, tdg.m_bfc); } } // check and build each type's parent types foreach (TokenDefinitionGroup tdg in rgtdg) { if (tdg.f_append) { Type tfst_master; if (!type_dict.TryGetValue(tdg.Identifier, out tfst_master)) TdlTokenizer.ErrorExit(tdg.tok_ident, "Cannot append type information to undefined type."); tfst_master.SetConstraints(tdg.m_bfc); } Type tfst_cur = type_dict[tdg.Identifier]; foreach (TdlTok tok_par in tdg.conj_par) { Type tfst; if (!type_dict.TryGetValue(tok_par.i_s, out tfst)) TdlTokenizer.ErrorExit(tok_par, String.Format("Type uses undefined parent type {0}", tok_par.i_s)); if (tfst_cur == tfst) TdlTokenizer.ErrorExit(tok_par, String.Format("Type {0} must not refer to itself as a parent", tok_par.i_s)); tfst_cur.AddIParent(tfst); } } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// EmbedBcpo /// /// Embed the type hierarchy in a bounded complete partial order (BCPO) lattice by inserting greatest- /// lower bound (GLB) types as required. /// /// References: /// Hassan Ait-Kaci, Robert Boyer, Patrick Lincoln, Roger Nasr. 1989. "Efficient Implementation of Lattice /// Operations" /// Ulrich Callmeier. 2001. "Efficient Parsing with Large-Scale Unification Grammars" p.28-30. /// Gerald Penn. 2000. "The Algebraic Structure of Attributed Type Signatures" p. 30-34 /// Bernd Kiefer, Hans-Ulrich Krieger, John Carroll, Rob Malouf. "A Bag of Useful Techniques for Efficient /// and Robust Parsing" p. 475. /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public void EmbedBcpo() { // retain the number of types prior to adding GLBs code_size = type_dict.Count; // check for inheritance from *top* and assign a maximal-depth value to each node foreach (Type t in type_dict.Values) t.FindLevel(); // generate Aït-Kaci transitive-closure codes for all user-specified types, in order according to // the depth values just determined. code_dict = new Dictionary<BitArr, Type>(); Type[] bitpos2type = new Type[code_size]; int cur_code = code_size - 1; foreach (Type tt in type_dict.Values.OrderByDescending(e => e.m_level)) { BitArr code = new BitArr(code_size); code[cur_code] = true; foreach (Type tc in tt.Children) code.OrEq(tc.m_code); code_dict.Add(tt.m_code = code, tt); bitpos2type[cur_code] = tt; cur_code--; } /// determine codes for the GLB types which will be needed to close the graph Type[] rg_tfs = type_dict.Values.Where(t => t.m_flags != Type.Flags.Leaf).ToArray(); while (true) { List<Type> added_glbs = new List<Type>(); for (int i = TopId + 1; i < rg_tfs.Length; i++) { Type t0 = rg_tfs[i]; BitArr ba0 = t0.m_code; for (int j = i + 1; j < rg_tfs.Length; j++) { Type t1 = rg_tfs[j]; // todo: fast test and with hash if (ba0.FastTest(t1.m_code)) { BitArr z = ba0.AndWithHash(t1.m_code); ushort thislevel = t0.m_level; if (t1.m_level > thislevel) thislevel = t1.m_level; Type glbt; if (code_dict.TryGetValue(z, out glbt)) { if (thislevel > glbt.m_level) glbt.m_level = thislevel; } else { glbt = AddType("glbtype" + (next_glb_num++), null); glbt.m_flags |= Type.Flags.GlbType; glbt.m_code = z; glbt.m_level = thislevel; code_dict.Add(z, glbt); added_glbs.Add(glbt); } } } } if (added_glbs.Count == 0) break; rg_tfs = added_glbs.ToArray(); } c_types = code_dict.Count; if (c_types >= (int)Edge.Flag.LowestFlagValue) throw new Exception(String.Format("System supports a maximum of {0} types and {1} types were specified.", (int)Edge.Flag.LowestFlagValue, c_types)); c_t_bits = BitArr.HighestOne(c_types) + 1; /// sort all types including the new GLB types into a certain topological order type_arr = code_dict .Values .OrderBy(e => e.m_level) .ThenByDescending(e => e.m_code.OnesCount()) .ToArray(); /// create a set of negations for all the codes, for use in step 1. BitArr[] notcodes = new BitArr[c_types]; for (int i = 0; i < c_types; i++) notcodes[i] = ~type_arr[i].m_code; int ops = 0; List<Type> draft = new List<Type>(); BitArr r = new BitArr(code_size); for (int i = c_types - 1; i > 0; i--) { Type node = type_arr[i]; /// node ids correspond to their index in 'type_arr'. Set it now. node.m_id = (int)i; /// Looking only at GLBs, we can directly obtain the transitive closure of the graph by carefully manipulating /// their parents and children. if (!node.IsGlb) continue; /// 1. This GLB as a parent: add its children /// 1a. get a list of draft candidates: all descendants of 'node' /// The order in which these are added is important because it determines the order of testing children /// in step 1b. It's just a topological order. The list is needed because children may be eliminated from /// the list before the point at which they'd otherwise be added. draft.Clear(); for (int j = i + 1; j < c_types; j++) { Type leafwards = type_arr[j]; if (!leafwards.m_code.FastTest(notcodes[i])) { draft.Add(leafwards); AddGlb(j, i, (Edge.Flag)leafwards.m_id); } } /// 1b. pick the subset of immediate children from this list /// While the list is not empty, add the oldest item as a child and then remove all of its descendants from the /// list. while (draft.Count > 0) { Type new_child = draft[0]; draft.RemoveAt(0); if (!new_child.IsGlb) foreach (Type par in new_child.Parents.ToArray()) { ops++; if (node.m_code.AndGivesExact(par.m_code)) new_child.RemoveIParent(par); } node.AddIChild(new_child); for (int q = 0; q < draft.Count; q++) { ops++; if (draft[q].m_code.AndGivesExact(new_child.m_code)) draft.RemoveAt(q--); } } /// 2. This GLB as a child: add its parents /// 2a. get a list of draft candidates between 'node' and *top* draft.Clear(); BitArr nc = node.m_code; for (int j = i - 1; j >= 0; j--) { Type topwards = type_arr[j]; if ((topwards.m_flags & Instance.Flags.Leaf) == 0) { ops++; if (nc.AndGivesExact(topwards.m_code)) { draft.Add(topwards); if (j > 0) AddGlb(i, j, (Edge.Flag)node.m_id); } } } /// 2b. pick a minimal set of parents from this list. /// Each selection allows us to eliminate others others from the list. This effect is maximized by starting /// with candidates with the fewest additional bits set beyond the required match. This was the reason for /// the secondary sort on 'arr' above. r.CopyFrom(nc); for (int j = 0; j < draft.Count; j++) { Type parent = draft[j]; if (parent.m_code.FastTestNotArg(r)) { r.OrEq(parent.m_code); for (int k = j + 1; k < draft.Count; k++) { ops++; if (parent.m_code.AndGivesExact(draft[k].m_code)) draft.RemoveAt(k--); } // all GLB-to-GLB edges are all added as children, above if (!parent.IsGlb) { foreach (uint bit_pos in nc.OnesPositions()) parent.RemoveIChild(bitpos2type[bit_pos]); node.AddIParent(parent); } } } } for (int i = 1; i < c_types; i++) { Type parent = type_arr[i]; if (!parent.IsGlb) foreach (Type child in parent.AllDescendants) AddGlb(i, child.m_id, (Edge.Flag)child.m_id); } Console.WriteLine("types {0} closed {1} glb {2} ops {3}", code_size, type_arr.Length, type_arr.Length - code_size, ops); Debug.Assert(type_arr[TopId] == TopType && TopType.m_id == TopId); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public void LoadFeatures() { /// The uninitialized value (zero) of the HasAnyFeatures flag means the type is unconstrainable. /// Mark all types below any occurrence of a constraint as non-atomic. /// One problem with this code is that the tdl, "t := *top* & [ ]." will cause 't' and its descendants /// to be marked as having features foreach (Type tt in type_arr) { if ((tt.m_flags & (Type.Flags.HasConstraints | Type.Flags.HasAnyFeatures)) == Type.Flags.HasConstraints) tt.SetHasAnyFeaturesFlagBelow(); } /// Atomic types are those featureless types without features in their subgraph foreach (Type tt in type_arr) { if ((tt.m_flags & (Type.Flags.HasAnyFeatures | Type.Flags.Atomic)) == 0) { HashSet<Type> hs = tt.AllDescendants; if (!hs.Any(e => e.HasAnyFeatures)) { tt.m_flags |= Type.Flags.Atomic; foreach (Type tta in hs) tta.m_flags |= Type.Flags.Atomic; } } } // Add features to a maximal introduction table // References: // Ann Copestake "The Compleat LKB" 1993, p. 93. // Carroll, Copestake, Malouf, Oepen - LKB checktypes.lsp // Ann Copestake "Implementing Typed Feature Structure Grammars" 2002 Dictionary<String, List<Type>> _maximal_types = new Dictionary<String, List<Type>>(); foreach (Type tt in type_arr.Where(e => e.HasAnyFeatures && e.m_bfc != null)) foreach (BaseFeatConstraint bfc in tt.m_bfc) { if (bfc[0].t != TdlTok.Type.Identifier) TdlTokenizer.ErrorExit(bfc[0], "Expected an identifier"); String feat = bfc[0].i_s; List<Type> l; if (_maximal_types.TryGetValue(feat, out l)) { if (!l.Any(e => tt.IsSubtypeOf(e))) { bool f_any = false; for (int i = 0; i < l.Count; i++) { if (l[i] == tt || l[i].IsSubtypeOf(tt)) { if (f_any) l.RemoveAt(i--); else { l[i] = tt; f_any = true; } } } if (!f_any) l.Add(tt); } } else { l = new List<Type>() { tt }; _maximal_types.Add(feat, l); } } // Set the local features. Check for multiple introduction int i_feat = 0; foreach (var kvp in _maximal_types) { Debug.Assert(kvp.Value.Count != 0, "Internal error in temporary maximal types computation."); List<Type> lt = kvp.Value; Type max_type = lt[0]; /// Feature should be introduced by only one type in the type hierarchy if (lt.Count > 1) TdlTokenizer.ErrorExit(String.Format("Feature {0} is introduced at multiple types ({1})", kvp.Key, lt.Select(tt => tt.Name).StringJoin(", "))); /// add the feature and associate it with its maximal introducing type feat_map.Add(kvp.Key, new FeatureInfo(kvp.Key, i_feat, max_type)); max_type.AddLocalFeature(i_feat); i_feat++; } _maximal_types = null; // construct reverse table which maps the feature index back to the feature info feat_arr = new FeatureInfo[i_feat]; foreach (var fi in feat_map.Values) feat_arr[fi.i_feat] = fi; // Do the following in topological order for (int i = 0; i < c_types; i++) type_arr[i].AddNonLocalFeatures(); // freeze feature configurations. will bind FCs to tray later fc_by_index = fcm.OrderBy(fc => fc.ConfigIndex).ToArray(); fc_by_type = type_arr.Select(t => t.fc).ToArray(); rgrgfix_by_type = rgrgfix_autotune = type_arr.Select(t => t.fc.rg_fix).ToArray(); UnsafeInit(rgrgfix_by_type); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Re-tune the feature unification order based on observed running failure counts /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public void Autotune() { fcm.RetuneAll(); rgrgfix_autotune = type_arr.Select(t => t.fc.rg_fix_autotune).ToArray(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Resolve some static types /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public void ResolveSpecialTypes() { if (!type_dict.TryGetValue(config.types.string_type, out StringType)) //StringType = AddType(config.types.string_type, null); throw new TdlException("The string type '{0}' was not defined in any TDL file.", config.types.string_type); this.StringId = strings.string_id = StringType.m_id; FeatureInfo fi; if (!feat_map.TryGetValue(config.types.f_list_head, out fi)) throw new TdlException("The list head feature '{0}' was not defined in any TDL file.", config.types.f_list_head); f_ix_list_head = fi.i_feat; tt_list_head = fi.maximal_type; if (!feat_map.TryGetValue(config.types.f_list_tail, out fi)) throw new TdlException("The list tail feature '{0}' was not defined in any TDL file.", config.types.f_list_tail); f_ix_list_tail = fi.i_feat; if (!type_dict.TryGetValue(config.types.typs_list, out tt_list)) throw new TdlException("The list type '{0}' was not defined in any TDL file.", config.types.typs_list); if (!type_dict.TryGetValue(config.types.typs_empty_list, out tt_empty)) throw new TdlException("The empty list type '{0}' was not defined in any TDL file.", config.types.typs_empty_list); if (!type_dict.TryGetValue(config.types.typs_diff_list, out tt_dlist_list)) throw new TdlException("The difference list type '{0}' was not defined in any TDL file.", config.types.typs_diff_list); if (!feat_map.TryGetValue(config.types.f_dlist_list, out fi)) throw new TdlException("The difference list 'LIST' feature '{0}' was not defined in any TDL file.", config.types.f_dlist_list); f_ix_dlist_list = fi.i_feat; if (!feat_map.TryGetValue(config.types.f_dlist_last, out fi)) throw new TdlException("The difference list 'LAST' feature '{0}' was not defined in any TDL file.", config.types.f_dlist_last); f_ix_dlist_last = fi.i_feat; tt_dlist_last = tt_list; if (!type_dict.TryGetValue(config.grammar.typs_key_daughter, out tt_key_daughter)) { /// LKB sample grammars do not define the LKB default asserted type '+' for KEY-ARG Console.Error.WriteLine("warning: The key daughter path type '{0}' was not defined in any TDL file.", config.grammar.typs_key_daughter); config.grammar.typs_key_daughter = config.types.top_type; tt_key_daughter = TopType; //throw new TdlException("The key daughter path type '{0}' was not defined in any TDL file.", config.typs_key_daughter); } String s = config.nodeLabels.LabelTemplateType; if (s != null && !type_dict.TryGetValue(s, out tt_label)) throw new TdlException("The node label template type '{0}' was not defined in any TDL file.", config.nodeLabels.LabelTemplateType); s = config.nodeLabels.MetaTemplateType; if (s != null && !type_dict.TryGetValue(s, out tt_meta)) { //throw new TdlException("The node meta template type '{0}' was not defined in any TDL file.", config._NodeLabelConfiguration.MetaTemplateType); } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public void ResolveSpecialEntries() { if (config.grammar.start_symbols == null) config.grammar.start_symbols = new List<String> { "root" }; rgtt_start_symbols = new StartSymbol[config.grammar.start_symbols.Count]; int i = 0; foreach (String ss in config.grammar.start_symbols) { Entry e; if (!entry_dict.TryGetValue(ss, out e)) { entry_dict.Add(ss, e = new StartSymbol(TopType, ss, null)); //throw new TdlException("Start symbol type '{0}' was not defined in any TDL file.", ss); } if ((rgtt_start_symbols[i] = e as StartSymbol) == null) { throw new TdlException("Type '{0}', which is listed as a start symbol, was not defined in the start symbols file.", ss); } i++; } if (config.parser.span_only_rules != null && config.parser.span_only_rules.Count > 0) { foreach (String ss in config.parser.span_only_rules) { Entry ent; if (!entry_dict.TryGetValue(ss, out ent)) throw new TdlException("Span only rule type '{0}' was not defined in any TDL file.", ss); var gr = ent as GrammarRule; if (gr == null) throw new TdlException("Span only rule type '{0}' is not a grammar rule.", ss); gr.IsSpanOnly = true; } } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public void Petrify() { /// calculate how many bits will be used for stringid/typeid int c_f_bits = 32 - BitArr.HighestOne((int)Edge.Flag.LowestFlagValue); int c_s_bits = BitArr.HighestOne(strings.Count) + 1; int c_ts_bits = Math.Max(c_t_bits, c_s_bits); if ((Edge.Flag)((1 << c_ts_bits) - 1) > Edge.Flag.MultiIdMask) throw new TdlException("Too many types ({0}) or strings ({1}) defined in the type hierarchy.", c_types, strings.Count); UpdateGlbCacheBareTypes(); #if FCTC_STATS var fconfigs = AllTypes .Select(t => new { type = t, features = t.fc }) .GroupBy(a => a.features, a => a.type) .ToArray(); Console.WriteLine("{0} types ({1}b), {2} strings ({3}b), {4} feat in {5} configurations (max feat/type {6}). -> edge bits: flags:{7} fconfigs:{8} sid/tid: {9}", c_types, c_t_bits, strings.Count, c_s_bits, feat_arr.Length, fconfigs.Length, fconfigs.Max(grp => grp.Key.Count), c_f_bits, 32 - c_f_bits - c_ts_bits, c_ts_bits); #endif f_petrified = true; } public bool IsPetrified { get { return f_petrified; } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #if FCTC_STATS public void TypePatternReport() { int node_count = 0; int distinct_TC = 0; var nodes = g.loadtray.Pools .SelectMany(cp => cp.PoolMarks) .GroupBy(pm => pm._in_mark) .ToArray(); foreach (var zx in nodes .Select(grp => new { types_used = grp.Select(pm => pm.ConstraintType) .OrderBy(t => t.m_id) .Select(t => t.m_id.ToString()) .StringJoin("-"), fc = fcm.Get(grp.Select(pm => pm.Pool.i_feat)) }) .GroupBy(a => a.fc, a => a.types_used)) { String[] vz = zx.Distinct().ToArray(); //Console.WriteLine("{0}\t{1} features\t{2} uses\t{3} distinct", zx.Key, zx.Key.Count, zx.Count(), vz.Length); foreach (var zs in vz.Take(100)) { // Console.WriteLine("\t{0}", zs); } distinct_TC += vz.Length; node_count += zx.Count(); } Debug.Assert(node_count == nodes.Length); int c_allbarenodes = nodes .Where(grp => grp.All(pm => pm.Constraint.Mark == 0)) .Count(); double avg_eligible_load = nodes .Where(grp => grp.All(pm => pm.Constraint.Mark == 0)) .Average(grp => grp.Count()); Console.WriteLine("nodes {0} distinct TC {1} {2:00.00}% all-bare nodes {3} {4:00.00}% avg. load {5:00.00}", node_count, distinct_TC, 100.0 * distinct_TC / node_count, c_allbarenodes, 100.0 * c_allbarenodes / node_count, avg_eligible_load); //if (Unification.c_all_unif != 0.0) // Console.WriteLine("unif {0} / {1} {2:00.00}%", Unification.c_skip_unif, Unification.c_all_unif, 100.0 * Unification.c_skip_unif / Unification.c_all_unif); //if (Unification.c_all_feat != 0.0) // Console.WriteLine("feat {0} / {1} {2:00.00}%", Unification.c_skip_feat, Unification.c_all_feat, 100.0 * Unification.c_skip_feat / Unification.c_all_feat); //Unification.c_skip_unif = 0; //Unification.c_all_unif = 0; //Unification.c_skip_feat = 0; //Unification.c_all_feat = 0; } #endif }; }