#define MINIFUNCS //#define CHECK_TOPMOST_FEAT using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Collections; using System.Runtime.InteropServices; using miew.Debugging; using miew.Enumerable; using miew.ReadOnly; namespace agree { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// N-way unification (full) /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #if DEBUG public unsafe partial class UnificationNWay #else public unsafe partial struct UnificationNWay #endif { const int CorefsMax = 300; const int ArgArityMax = 12; const int MaxParticipants = 16; // (13) const int TopmostMark = 1; const int DeferredWrite = int.MinValue; [DebuggerBrowsable(DebuggerBrowsableState.Never)] TypeMgr tm; ParseControl ctrl; ushort*[] rgpfix; Tfs[] participants; #if NODE_INDEX NodeIndex** pp_ni_ptx; bool f_any_ni; #endif [DebuggerBrowsable(DebuggerBrowsableState.Never)] byte c_participants; [DebuggerBrowsable(DebuggerBrowsableState.Never)] NwayArgs* _corefs, pa_next; [DebuggerBrowsable(DebuggerBrowsableState.Never)] NwayArgs** ppmb_next, pp_map_base; [DebuggerBrowsable(DebuggerBrowsableState.Never)] NwayArgs*** participant_bases; arr_tfs_entry** coref_links; #if CHECK_TOPMOST_FEAT ushort c_fix_topmost; #endif public int next_mark; public int next_coref_mark; public int min_coref_in_mark; public arr_tfs_entry* p_ate; public arr_tfs_entry* p_ate_base; int c_edge_alloc; int c_plug_remain; PlugPair* plugs; PlugPair* plugs_end; byte* del_daughter_map; bool f_restrict; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public UnificationNWay(ParseControl ctrl) #if !DEBUG : this() #endif { #if DEBUG UnificationNWay.unwy = this; #endif this.ctrl = ctrl; this.tm = ctrl.tm; this.participants = new Tfs[MaxParticipants]; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// n-way unification : locates and traverses a unilateral, type-compatible monotonic descent, if one exists, through /// any number of internally coreferenced typed feature structures (TFS). /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public bool _n_way(NwayArgs* pnwa_in, int m_dst, bool f_write) { Debug.Assert((pnwa_in->f & Edge.Flag.EtmNonBareType) != 0); Debug.Assert(pnwa_in->c_args > 0); int j, k, i_arg, c_args = pnwa_in->c_args; TfsixMark* ptm; #if NODE_INDEX NodeIndexEntry** rgnie = stackalloc NodeIndexEntry*[c_args]; if (f_any_ni) { for (i_arg = 0; i_arg < c_args; i_arg++) { ptm = (TfsixMark*)pnwa_in->tmx + i_arg; NodeIndex* pni = pp_ni_ptx[ptm->tfsix]; if (pni != null) { j = ptm->mark + pni->add_to_mark; if (j >= 0 && j < pni->c_limit) rgnie[i_arg] = ((NodeIndexEntry**)(pni + 1))[j]; } } } #endif ushort* pfix = rgpfix[(int)(pnwa_in->f & Edge.Flag.MultiIdMask)], pfix_end = *pfix++ + pfix; while (pfix < pfix_end) { ushort i_feat = *pfix++; bool f_prune = del_daughter_map != null && del_daughter_map[i_feat] == 1; bool f_expand = false; bool f_coref = false; NwayArgs** ppa_cur; NwayArgs nwa; //NwayArgs* pa = pa_next; NwayArgs* pa = &nwa; pa->f = 0; pa->c_remain = pa->c_args = 0; pa->fm_first = null; for (i_arg = 0; i_arg < c_args; i_arg++) { short tfsix; Tfs tfs; Edge.Flag nf; int nm; #if NODE_INDEX NodeIndexEntry* pnie; if (f_any_ni && (pnie = rgnie[i_arg]) != null) { try_again: if (pnie->i_feat > i_feat) continue; if (pnie->i_feat < i_feat) { rgnie[i_arg] = ++pnie; goto try_again; } if ((nf = pnie->f) == 0) continue; nm = pnie->out_mark; tfsix = ((TfsixMark*)pnwa_in->tmx + i_arg)->tfsix; tfs = null; } else #endif { ptm = (TfsixMark*)pnwa_in->tmx + i_arg; tfsix = ptm->tfsix; tfs = participants[tfsix]; if ((nf = tfs.TryGetFlagsMark(i_feat, ptm->mark, out nm)) == 0) continue; } nf &= ~(Edge.Flag.Coreference | Edge.Flag.PrunedDuringParsing); if (pa->f == 0 || pa->f == nf) { pa->f = nf; f_expand = false; } else if (nf != 0) { Edge.Flag prev = pa->f; pa->f = tm.UnifyTypesNoCoref(prev, nf); if (pa->f == Edge.Flag.Bottom) return false; if (pa->f == nf) f_expand = false; else if (pa->f != prev) f_expand = true; /* note: if (f == prev), retain previous f_expand state */ } if (nm == 0) continue; bool already_have = false; int txnm = (tfsix << 16) | (ushort)nm; if (nm < 0) { nm = ~nm; NwayArgs* pa_cur = *(ppa_cur = participant_bases[tfsix] + nm); if (pa_cur == null) { if (tfs == null) tfs = participants[tfsix]; byte c_remain = (f_restrict ? tfs.coref_tally_map_restricted : tfs.coref_tally_map)[nm]; Debug.Assert(c_remain > 0); // orphan corefs (m<0) are not added to coref table. thus, it is not an error for // a negative mark to appear in the nwa if (c_remain <= 1) goto coref_bit_false_positive; if (pa == &nwa) { pa = pa_next++; pa->f = nwa.f; for (j = 0; j < nwa.c_args; j++) pa->tmx[pa->c_args++] = nwa.tmx[j]; } *ppa_cur = pa; pa->c_remain += c_remain; } else { if (pa == &nwa) { if (!tm.UnifyInTypeNoCoref(ref pa->f, pa_cur->f)) return false; pa = pa_cur; pa->f = nwa.f; for (j = 0; j < nwa.c_args; j++) pa->tmx[pa->c_args++] = nwa.tmx[j]; } else if (pa != pa_cur) { if (!tm.UnifyInTypeNoCoref(ref pa->f, pa_cur->f)) return false; if ((k = pa_cur->c_args) > 0) { TfsixMark* dst = (TfsixMark*)(pa->tmx + pa->c_args); TfsixMark* src = (TfsixMark*)pa_cur->tmx; for (j = 0; j < k; j++) { if (src->mark < 0) participant_bases[src->tfsix][~src->mark] = pa; *dst++ = *src++; } pa->c_args += k; } // link FMs from 'pa_cur' to 'pa' if (pa->fm_first == null) { if ((pa->fm_first = pa_cur->fm_first) != null) { pa->fm_last = pa_cur->fm_last; } } else if (pa_cur->fm_first != null) { coref_links[pa->fm_last - p_ate_base] = pa_cur->fm_first; pa->fm_last = pa_cur->fm_last; } pa->c_remain += pa_cur->c_remain; pa_cur->c_remain = 0; } k = pa->c_args; for (j = 0; j < k; j++) if (pa->tmx[j] == txnm) { already_have = true; break; } } Debug.Assert(pa->c_remain > 0); pa->c_remain--; f_coref = true; } coref_bit_false_positive: if (!already_have) pa->tmx[pa->c_args++] = txnm; if (c_plug_remain > 0) { for (PlugPair* pp = plugs; pp < plugs_end; pp++) { if (*(int*)&pp->seek == txnm) { if (!tm.UnifyInTypeNoCoref(ref pa->f, pp->plug.e.FlagsId & ~Edge.Flag.Coreference)) return false; *((TfsixMark*)pa->tmx + pa->c_args++) = new TfsixMark(pp->plug); *(int*)&pp->seek = -1; c_plug_remain--; break; } } } } bool f_nonbare = (pa->f & Edge.Flag.EtmNonBareType) != 0; if (f_expand && f_nonbare) { Tfs tfs_exp = tm.type_arr[(int)(pa->f & Edge.Flag.MultiIdMask)].Expanded; Debug.Assert(!(tfs_exp is BareTfs)); ((TfsixMark*)pa->tmx)[pa->c_args++] = new TfsixMark(AddTfs(tfs_exp), tfs_exp.Edge.Mark); } if (f_write) { if (f_prune) { if (f_nonbare && (pa->f != 0 || f_coref)) { if (!_n_way(pa, 0, false)) return false; } /// outbound now with success below, so we've noted the corefs below and we can now prune the node. if (m_dst < min_coref_in_mark) min_coref_in_mark = m_dst; p_ate->i_feat = i_feat; p_ate->mark = m_dst; #if CHECK_TOPMOST_FEAT if (m_dst == 1) c_fix_topmost--; #endif p_ate->e = Edge.PrunedEdge; p_ate++; goto recover; } /// only write the 'in-' half of a coreferenced node now (that is, during inbound traversal), and add /// it to the FM queue to have its 'out-' half written with the finished edge when the coreference is /// complete and outbound. This step cannot be postponed until outbound, because it's possible for /// the coref to be completed during the traversal below this point if (f_coref && m_dst != 0) { if (m_dst < min_coref_in_mark) min_coref_in_mark = m_dst; p_ate->i_feat = i_feat; p_ate->mark = m_dst; #if CHECK_TOPMOST_FEAT if (m_dst == 1) c_fix_topmost--; #endif if (pa->fm_first == null) pa->fm_last = p_ate; else coref_links[p_ate - p_ate_base] = pa->fm_first; pa->fm_first = p_ate; p_ate++; } } if (pa->c_args == 0 || !f_coref) { k = f_nonbare && f_write ? next_mark++ : 0; if (f_nonbare && !_n_way(pa, k, f_write)) return false; /// outbound with success below. write a non-coreferenced edge if (f_write && m_dst != 0 && pa->f != 0) { if (m_dst < min_coref_in_mark) min_coref_in_mark = m_dst; p_ate->i_feat = i_feat; p_ate->mark = m_dst; #if CHECK_TOPMOST_FEAT if (m_dst == 1) c_fix_topmost--; #endif p_ate->e_FlagsId = pa->f; p_ate->e_Mark = k; p_ate++; } } else if (pa->c_remain == 0) { if (pa->f != 0 || pa->fm_first != pa->fm_last) { #if MINIFUNCS if (!Write(pa, f_nonbare, f_write || pa->fm_first != null)) return false; #else /// re-enable writing if we pop out of a coreference which has a FeatMark stored, /// since that puts us back into a writing area. i.e. must still traverse down until we pop out pa->mx = 0; bool fwx = f_write || pa->fm_first != null; if (fwx) { if (pa->fm_first != pa->fm_last) { pa->f |= Edge.Flag.Coreference; pa->mx = next_coref_mark--; } else if (f_nonbare) pa->mx = next_mark++; } if (f_nonbare && !_n_way(pa, pa->mx, fwx)) return false; if (pa->fm_first == null) continue; else if (pa->fm_first == pa->fm_last) pa->fm_first->e_ul = *(ulong*)pa; else pa->f_defer = true; #endif } } else continue; recover: /// wrote. since writing is definitive, check for opportunistic recovery of variadic slots if (pa->c_remain != DeferredWrite && pa + 1 == pa_next) { pa_next--; pa_next->f = 0; pa_next->c_remain = pa_next->c_args = 0; pa_next->fm_first = null; } } return true; } #if MINIFUNCS bool Write(NwayArgs* pa, bool f_nonbare, bool fwx) { /// re-enable writing if we pop out of a coreference which has a FeatMark stored, /// since that puts us back into a writing area. i.e. must still traverse down until we pop out pa->mx = 0; if (fwx) { if (pa->fm_first != pa->fm_last) { pa->f |= Edge.Flag.Coreference; pa->mx = next_coref_mark--; } else if (f_nonbare) pa->mx = next_mark++; } if (f_nonbare && !_n_way(pa, pa->mx, fwx)) return false; if (pa->fm_first == null) return true; else if (pa->fm_first == pa->fm_last) pa->fm_first->e_ul = *(ulong*)pa; else pa->c_remain = int.MinValue; return true; } #endif /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// byte AddTfs(Tfs tfs) { //if (c_participants >= MaxParticipants) // throw new InvalidOperationException("need more participant slots"); TfsSection ts = tfs as TfsSection; byte[] map = f_restrict ? tfs.coref_tally_map_restricted : tfs.coref_tally_map; if (map == null) { if (ts != null) { if (f_restrict) ts._load_coref_tally_map_restricted(); else ts._load_coref_tally_map(); } else { int c_corefs = 0; int max_c = tfs.coref_tally_map.Length; if (f_restrict) { if (max_c > 0 && (tfs.Edge.FlagsId & Edge.Flag.EtmNonBareType) != 0) { map = new byte[max_c]; EdgeCounterRestricted ec = new EdgeCounterRestricted(tfs, tfs.Edge, map); c_corefs = ec.ActualCount; } if (c_corefs == 0 || map == null) map = Tfs.NoCorefsTallyMap; tfs.coref_tally_map_restricted = map; } else { if (max_c > 0 && (tfs.Edge.FlagsId & Edge.Flag.EtmNonBareType) != 0) { map = new byte[max_c]; EdgeCounter ec = new EdgeCounter(tfs, tfs.Edge, map); c_corefs = ec.ActualCount; } if (c_corefs == 0 || map == null) map = Tfs.NoCorefsTallyMap; tfs.coref_tally_map = map; } } } byte tfsix = c_participants++; if (ts != null) tfs = ts.mother; participants[tfsix] = tfs; participant_bases[tfsix] = ppmb_next; ppmb_next += map.Length; //if (ppmb_next - pp_map_base > CorefsMax) // throw new InvalidOperationException("need more coref slots"); #if NODE_INDEX ArrayTfs atfs; if ((atfs = tfs as ArrayTfs) != null) { NodeIndex* pni; if ((pni = atfs.p_node_index) != null) { pp_ni_ptx[tfsix] = pni; f_any_ni = true; } } #endif return tfsix; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// MotherDaughterTfs _unify_sections(Edge.Flag f, TfsixMark txm_mother, PlugPair* candidates, int count) { NwayArgs nwa; nwa.f = f; nwa.tmx[0] = *(int*)&txm_mother; nwa.c_args = 1; NwayArgs* _p = stackalloc NwayArgs[CorefsMax]; pa_next = _corefs = _p; arr_tfs_entry* _p_ate = stackalloc arr_tfs_entry[c_edge_alloc]; p_ate = p_ate_base = _p_ate; arr_tfs_entry** _p_corefs_list = stackalloc arr_tfs_entry*[c_edge_alloc]; coref_links = _p_corefs_list; #if CHECK_TOPMOST_FEAT this.c_fix_topmost = *tm.rgpfix[(int)(f & Edge.Flag.MultiIdMask)]; #endif this.c_plug_remain = count; this.plugs = candidates; this.plugs_end = plugs + c_plug_remain; this.next_coref_mark = -1; this.next_mark = TopmostMark + 1; if (!_n_way(&nwa, TopmostMark, true)) return null; int i_ate = (int)(this.p_ate - _p_ate); if (i_ate > c_edge_alloc) throw new Exception(); for (NwayArgs* pa = _corefs; pa < pa_next; pa++) { /// Self-blocking structures will result in non-zero tally remainders. The presence of such structure implies /// unification failure. if (pa->c_remain > 0) return null; else if (pa->c_remain == DeferredWrite) for (arr_tfs_entry* pfm = pa->fm_first; pfm != null; pfm = _p_corefs_list[pfm - p_ate_base]) pfm->e_ul = *(ulong*)pa; } #if CHECK_TOPMOST_FEAT //Console.Write("{0} ", c_fix_topmost); // Debug.Assert(c_fix_topmost == 0); #endif return new MotherDaughterTfs( ctrl, new Edge(f, TopmostMark), _p_ate, i_ate, next_mark, next_coref_mark, min_coref_in_mark, f_restrict); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public static MotherDaughterTfs UnifySections( ParseControl ctrl, MotherDaughterTfs mother, int i_first_arg, Tfs[] candidates, bool f_args_delete, bool f_pack_restrict) { #if false if ((f & Edge.Flag.EtmNonBareType) == 0) throw new NotImplementedException(); #endif fixed (byte* _del_daughter_map = ctrl.tm.config.parser.deleted_daughter_map) { UnificationNWay u = new UnificationNWay(ctrl); if (f_args_delete) u.del_daughter_map = _del_daughter_map; u.rgpfix = (u.f_restrict = f_pack_restrict) ? ctrl.tm.rgpfix_restr : ctrl.tm.rgpfix_allft; NwayArgs** _pp = stackalloc NwayArgs*[CorefsMax]; u.ppmb_next = u.pp_map_base = _pp; NwayArgs*** _ppp = stackalloc NwayArgs**[MaxParticipants]; u.participant_bases = _ppp; #if NODE_INDEX NodeIndex** _pni = stackalloc NodeIndex*[MaxParticipants]; u.pp_ni_ptx = _pni; #endif TfsixMark txm_mother; txm_mother.tfsix = u.AddTfs(mother); txm_mother.mark = (short)mother.Edge.Mark; u.c_edge_alloc = mother.EdgeCount + 100; PlugPair* sections = stackalloc PlugPair[candidates.Length]; for (int i = 0; i < candidates.Length; i++) { PlugPair* p = sections + i; p->seek.tfsix = txm_mother.tfsix; p->seek.mark = (short)mother.RuleDaughters[i_first_arg++].Edge.Mark; Tfs cnd = candidates[i]; p->plug.tfsix = u.AddTfs(cnd); p->plug.e = cnd.Edge; u.c_edge_alloc += cnd.EdgeCount; } return u._unify_sections(mother.Edge.FlagsId, txm_mother, sections, candidates.Length); } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public static MotherDaughterTfs UnifySection( ParseControl ctrl, TfsSection daughter, Tfs candidate, bool f_pruning, bool f_pack_restrict) { MotherDaughterTfs mother = (MotherDaughterTfs)daughter.mother; Edge.Flag f = mother.Edge.FlagsId; #if false if ((f & Edge.Flag.EtmNonBareType) == 0) throw new NotImplementedException(); #endif fixed (byte* _del_daughter_map = ctrl.tm.config.parser.deleted_daughter_map) { UnificationNWay u = new UnificationNWay(ctrl); if (f_pruning) u.del_daughter_map = _del_daughter_map; u.rgpfix = (u.f_restrict = f_pack_restrict) ? ctrl.tm.rgpfix_restr : ctrl.tm.rgpfix_allft; NwayArgs** _pp = stackalloc NwayArgs*[CorefsMax]; u.ppmb_next = u.pp_map_base = _pp; NwayArgs*** _ppp = stackalloc NwayArgs**[MaxParticipants]; u.participant_bases = _ppp; #if NODE_INDEX NodeIndex** _pni = stackalloc NodeIndex*[MaxParticipants]; u.pp_ni_ptx = _pni; #endif TfsixMark txm_mother; txm_mother.tfsix = u.AddTfs(mother); txm_mother.mark = (short)mother.Edge.Mark; u.c_edge_alloc = mother.EdgeCount + candidate.EdgeCount + 100; PlugPair pp; pp.seek.tfsix = txm_mother.tfsix; pp.seek.mark = (short)daughter.Edge.Mark; pp.plug.tfsix = u.AddTfs(candidate); pp.plug.e = candidate.Edge; return u._unify_sections(f, txm_mother, &pp, 1); } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public partial struct NwayArgs { public Edge.Flag f; public int mx; public arr_tfs_entry* fm_first; public arr_tfs_entry* fm_last; public int c_remain; public int c_args; [DebuggerBrowsable(DebuggerBrowsableState.Never)] public fixed int tmx[ArgArityMax]; }; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public struct TfsixMark { public TfsixMark(short tfsix, int mark) { this.tfsix = tfsix; this.mark = (short)mark; } public TfsixMark(TfsixEdge te) { this.tfsix = te.tfsix; this.mark = (short)te.e.Mark; } public short mark; public short tfsix; }; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public struct PlugPair { public PlugPair(TfsixMark seek, TfsixEdge plug) { this.seek = seek; this.plug = plug; } public TfsixMark seek; public TfsixEdge plug; }; }; }