using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Threading; using System.Threading.Tasks; using System.Runtime.InteropServices; using miew.BitArray; using miew.Enumerable; #if ILFUNC using IlFunc; #endif #pragma warning disable 0649 namespace agree { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public unsafe partial class TypeMgr { public const uint GLB_CACHE_ENTRIES = 300007;//570001; // a prime number public GlbCacheEntry[][] glb_cache = new GlbCacheEntry[GLB_CACHE_ENTRIES][]; public Dictionary<BitArr, Type> code_dict; public struct GlbCacheEntry { public GlbCacheEntry(uint k, Edge.Flag f) { this.k = k; this.f = f; } public uint k; public Edge.Flag f; }; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void UpdateGlbCacheBareTypes() { foreach (GlbCacheEntry[] rge in glb_cache) if (rge != null) for (int i = 0; i < rge.Length; i++) if (!type_arr[(int)(rge[i].f & Edge.Flag.MultiIdMask)].IsBare) rge[i].f |= Edge.Flag.EtmNonBareType; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void AddGlb(int id1, int id2, Edge.Flag f) { Debug.Assert(c_t_bits != -1); Debug.Assert(id1 != 0 && id2 != 0 && id1 != id2); uint k, hash = (k = (uint)(id1 < id2 ? (id2 << 16) | id1 : (id1 << 16) | id2)) % GLB_CACHE_ENTRIES; GlbCacheEntry gce = new GlbCacheEntry(k, f); GlbCacheEntry[] rge = glb_cache[hash]; if (rge == null) glb_cache[hash] = new GlbCacheEntry[] { gce }; else { /// ordered insert int i = 0, j = 0, c_prev = rge.Length; GlbCacheEntry[] rgenew = new GlbCacheEntry[c_prev + 1]; while (i < c_prev) { if (k < rge[i].k) { rgenew[j++] = gce; k = int.MaxValue; } rgenew[j++] = rge[i++]; } if (i == j) rgenew[j] = gce; /// atomic publishing; don't care about lost entries due to race glb_cache[hash] = rgenew; } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public bool ComputeAndCacheGlb(uint k, int id1, int id2, out Edge.Flag ef_glb) { Type t_glb; bool b_ret; /// note: already measured using a direct code_arr and it was slower; presumably it's more important to /// keep the type_arr cache warm. if (!(b_ret = code_dict.TryGetValue(type_arr[id1].m_code.AndWithHash(type_arr[id2].m_code), out t_glb))) { ef_glb = Edge.Flag.Bottom; // todo: more cache entries can be implied, see "bag of useful techniques" paper } else { if (t_glb == StringType) ef_glb = Edge.Flag.EtmString; else { ef_glb = (Edge.Flag)t_glb.m_id; if ((t_glb.m_flags & Type.Flags.HasAnyFeatures) != 0) ef_glb |= Edge.Flag.EtmNonBareType; } } GlbCacheEntry gce = new GlbCacheEntry(k, ef_glb); uint hash; GlbCacheEntry[] rge = glb_cache[hash = k % GLB_CACHE_ENTRIES]; if (rge == null) glb_cache[hash] = new GlbCacheEntry[] { gce }; else { /// ordered insert GlbCacheEntry[] rgenew = new GlbCacheEntry[rge.Length + 1]; int i = 0, j = 0; while (i < rge.Length) { if (k < rge[i].k) { rgenew[j++] = gce; k = int.MaxValue; } rgenew[j++] = rge[i++]; } if (i == j) rgenew[j] = gce; /// atomic publishing; don't care about lost entries due to race glb_cache[hash] = rgenew; } return b_ret; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public Edge.Flag ComputeAndCacheGlb(uint k, int id1, int id2) { Type t_glb; Edge.Flag ef_glb; /// note: already measured using a direct code_arr and it was slower; presumably it's more important to /// keep the type_arr cache warm. if (!code_dict.TryGetValue(type_arr[id1].m_code.AndWithHash(type_arr[id2].m_code), out t_glb)) { ef_glb = Edge.Flag.Bottom; // todo: more cache entries can be implied, see "bag of useful techniques" paper } else { if (t_glb == StringType) ef_glb = Edge.Flag.EtmString; else { ef_glb = (Edge.Flag)t_glb.m_id; if ((t_glb.m_flags & Type.Flags.HasAnyFeatures) != 0) ef_glb |= Edge.Flag.EtmNonBareType; } } uint hash = k % GLB_CACHE_ENTRIES; GlbCacheEntry gce = new GlbCacheEntry(k, ef_glb); /// ordered insert GlbCacheEntry[] rgenew, rge = glb_cache[hash]; if (rge == null) rgenew = new GlbCacheEntry[] { gce }; else { rgenew = new GlbCacheEntry[rge.Length + 1]; int i = 0, j = 0; while (i < rge.Length) { if (k < rge[i].k) { rgenew[j++] = gce; k = int.MaxValue; } rgenew[j++] = rge[i++]; } if (i == j) rgenew[j] = gce; } /// atomic publishing; don't care about lost entries due to race glb_cache[hash] = rgenew; return ef_glb; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public int GetGlbCheckTopAndIdentity(int id1, int id2) { if (id1 == 0 || id1 == id2) return id2; if (id2 == 0) return id1; Edge.Flag ef_glb; uint k = (uint)(id1 < id2 ? (id2 << 16) | id1 : (id1 << 16) | id2); GlbCacheEntry[] rge = glb_cache[k % GLB_CACHE_ENTRIES]; if (rge != null) { uint gk, i = 0; do { GlbCacheEntry gce = rge[i]; if ((gk = gce.k) == k) { ef_glb = gce.f; goto found; } } while (gk < k && ++i < rge.Length); } if (!ComputeAndCacheGlb(k, id1, id2, out ef_glb)) return -1; found: if ((ef_glb & Edge.Flag.EtmString) != 0) return StringId; return (int)(ef_glb & Edge.Flag.MultiIdMask); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public bool CanUnify(int id1, int id2) { Debug.Assert(id1 != 0 && id2 != 0 && id1 != id2); uint k = (uint)(id1 < id2 ? (id2 << 16) | id1 : (id1 << 16) | id2); GlbCacheEntry[] rge = glb_cache[k % GLB_CACHE_ENTRIES]; if (rge != null) { uint gk, i = 0; do { GlbCacheEntry gce = rge[i]; if ((gk = gce.k) == k) return gce.f >= 0; } while (gk < k && ++i < rge.Length); } return ComputeAndCacheGlb(k, id1, id2) >= 0; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public bool CanUnify(Edge.Flag f1, Edge.Flag f2) { int id1, id2; if ((f1 & Edge.Flag.EtmString) != 0) { if ((f2 & Edge.Flag.EtmString) != 0) return ((f1 ^ f2) & Edge.Flag.MultiIdMask) == 0; id1 = StringId; id2 = (int)(f2 & Edge.Flag.MultiIdMask); } else if ((f2 & Edge.Flag.EtmString) != 0) { id1 = (int)(f1 & Edge.Flag.MultiIdMask); id2 = StringId; } else if ((id1 = (int)(f1 & Edge.Flag.MultiIdMask)) == 0 || (id2 = (int)(f2 & Edge.Flag.MultiIdMask)) == 0 || id1 == id2) return true; uint k = (uint)(id1 < id2 ? (id2 << 16) | id1 : (id1 << 16) | id2); GlbCacheEntry[] rge = glb_cache[k % GLB_CACHE_ENTRIES]; if (rge != null) { uint gk, i = 0; do { if ((gk = rge[i].k) == k) return rge[i].f >= 0; } while (gk < k && ++i < rge.Length); } Edge.Flag ef_glb; return ComputeAndCacheGlb(k, id1, id2, out ef_glb); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public Edge.Flag UnifyTypes(Edge.Flag f0, Edge.Flag f1) { Debug.Assert(f0 != f1 && f0 != 0 && f1 != 0); int t0 = (int)(f0 & Edge.Flag.MultiIdMask); int t1 = (int)(f1 & Edge.Flag.MultiIdMask); Edge.Flag nfex; if (((nfex = ((f0 | f1) & (Edge.Flag.Coreference | Edge.Flag.EtmString))) & Edge.Flag.EtmString) != 0) { if ((f0 & Edge.Flag.EtmString) == 0) return t0 == 0 || CanUnify(StringId, t0) ? f1 | nfex : Edge.Flag.Bottom; if ((f1 & Edge.Flag.EtmString) == 0) return t1 == 0 || CanUnify(StringId, t1) ? f0 | nfex : Edge.Flag.Bottom; return (t0 == 0 || t0 == t1) ? (f1 | nfex) : (t1 == 0) ? (f0 | nfex) : Edge.Flag.Bottom; } if (t0 == 0 || t0 == t1) return f1 | nfex; if (t1 == 0) return f0 | nfex; uint k = (uint)(t0 < t1 ? (t1 << 16) | t0 : (t0 << 16) | t1); TypeMgr.GlbCacheEntry[] rge = glb_cache[k % TypeMgr.GLB_CACHE_ENTRIES]; if (rge != null) { TypeMgr.GlbCacheEntry gce; uint gk, i = 0; do if ((gk = (gce = rge[i]).k) == k) return gce.f | nfex; while (gk < k && ++i < rge.Length); } return ComputeAndCacheGlb(k, t0, t1) | nfex; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public Edge.Flag UnifyTypesNoCoref(Edge.Flag f0, Edge.Flag f1) { int t0 = (int)(f0 & Edge.Flag.MultiIdMask); int t1 = (int)(f1 & Edge.Flag.MultiIdMask); if (((f0 | f1) & Edge.Flag.EtmString) != 0) { if ((f0 & Edge.Flag.EtmString) == 0) { if (t0 != 0 && !CanUnify(StringId, t0)) return Edge.Flag.Bottom; return f1; } else if ((f1 & Edge.Flag.EtmString) == 0) { if (t1 != 0 && !CanUnify(StringId, t1)) return Edge.Flag.Bottom; return f0; } else if (t0 == 0 || t0 == t1) return f1; else if (t1 != 0) return Edge.Flag.Bottom; return f0; } Debug.Assert(t0 != 0 && t1 != 0 && t0 != t1); uint k = (uint)(t0 < t1 ? (t1 << 16) | t0 : (t0 << 16) | t1); TypeMgr.GlbCacheEntry[] rge = glb_cache[k % TypeMgr.GLB_CACHE_ENTRIES]; if (rge != null) { TypeMgr.GlbCacheEntry gce; uint gk, i = 0; do if ((gk = (gce = rge[i]).k) == k) { return f0 = gce.f; } while (gk < k && ++i < rge.Length); } return f0 = ComputeAndCacheGlb(k, t0, t1); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public bool UnifyInTypeNoCoref(ref Edge.Flag f, Edge.Flag nf) { Debug.Assert(nf >= 0); if (nf == 0 || f == nf) return true; if (f == 0) { f = nf; return true; } Edge.Flag nfex = (f | nf) & Edge.Flag.EtmString; int t0 = (int)(f & Edge.Flag.MultiIdMask); int t1 = (int)(nf & Edge.Flag.MultiIdMask); if (nfex != 0) { if ((f & Edge.Flag.EtmString) == 0) { if (t0 != 0 && !CanUnify(StringId, t0)) return false; f = nf; } else if ((nf & Edge.Flag.EtmString) == 0) { if (t1 != 0 && !CanUnify(StringId, t1)) return false; } else if (t0 == 0 || t0 == t1) f = nf; else if (t1 != 0) return false; return true; } uint k = (uint)(t0 < t1 ? (t1 << 16) | t0 : (t0 << 16) | t1); TypeMgr.GlbCacheEntry[] rge = glb_cache[k % TypeMgr.GLB_CACHE_ENTRIES]; if (rge != null) { TypeMgr.GlbCacheEntry gce; uint gk, i = 0; do if ((gk = (gce = rge[i]).k) == k) { f = gce.f; return f != Edge.Flag.Bottom; } while (gk < k && ++i < rge.Length); } f = ComputeAndCacheGlb(k, t0, t1); return f != Edge.Flag.Bottom; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public Type GlbOfMany(IEnumerable<int> ie_fix) { return GetGlbOfMany(ie_fix.Select(fix => feat_arr[fix].maximal_type)); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Consider other options for highly spun code /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public Type GetGlbOfMany(IEnumerable<Type> _ie) { using (IEnumerator<Type> ie = _ie.GetEnumerator()) { if (!ie.MoveNext()) return null; int tid = ie.Current.m_id; while (ie.MoveNext()) if ((tid = GetGlbCheckTopAndIdentity(tid, ie.Current.m_id)) == -1) return null; return type_arr[tid]; } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public String GlbCacheInfo() { int c = 0; int min = int.MaxValue; int max = int.MinValue; foreach (TypeMgr.GlbCacheEntry[] rge in g.tm.glb_cache) if (rge != null) { int l = rge.Length; c += l; if (l < min) min = l; if (l > max) max = l; } return String.Format("glb-cache: slots: {0} tot: {1} min {2} max {3} avg load {4:0.000}", GLB_CACHE_ENTRIES, c, min, max, (double)c / GLB_CACHE_ENTRIES); } public Type UnifyTypes(String t0, String t1) { Edge.Flag f = UnifyTypes((Edge.Flag)type_dict[t0].m_id, (Edge.Flag)type_dict[t1].m_id); if (f == Edge.Flag.Bottom) return null; if ((f & Edge.Flag.EtmString) != 0) return StringType; return type_arr[(int)(f & Edge.Flag.MultiIdMask)]; } public ushort*[] rgpfix_allft; public ushort*[] rgpfix_restr; public ushort*[] rgpfix_dargs; public ushort*[] rgpfix_da_rs; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public unsafe void UnsafeInit(int[][] rgrgfix) { HashSet<int> restr = config.parser.packing_restrictors.Select(s => feat_map[s].i_feat).ToHashSet(); HashSet<int> dargs = config.parser.deleted_daughters.Select(s => feat_map[s].i_feat).ToHashSet(); int c_restr, c_dargs, c_da_rs, c_allft; c_restr = c_dargs = c_da_rs = c_allft = rgrgfix.Length; foreach (int[] rg in rgrgfix) { c_allft += rg.Length; foreach (int fix in rg) { bool br = !restr.Contains(fix); bool bd = !dargs.Contains(fix); if (br) c_restr++; if (bd) c_dargs++; if (br && bd) c_da_rs++; } } ushort* pi_allft = (ushort*)Marshal.AllocHGlobal((c_allft + c_restr + c_dargs + c_da_rs) * sizeof(ushort)); ushort* pi_restr = pi_allft + c_allft; ushort* pi_dargs = pi_restr + c_restr; ushort* pi_da_rs = pi_dargs + c_dargs; rgpfix_allft = new ushort*[rgrgfix.Length]; rgpfix_restr = new ushort*[rgrgfix.Length]; rgpfix_dargs = new ushort*[rgrgfix.Length]; rgpfix_da_rs = new ushort*[rgrgfix.Length]; int i = 0; foreach (int[] rg in rgrgfix) { *(rgpfix_allft[i] = pi_allft++) = (ushort)rg.Length; ushort* p0 = (rgpfix_restr[i] = pi_restr++); ushort* p1 = (rgpfix_dargs[i] = pi_dargs++); ushort* p2 = (rgpfix_da_rs[i] = pi_da_rs++); foreach (int fix in rg.OrderBy(x => x)) { *pi_allft++ = (ushort)fix; bool br = !restr.Contains(fix); bool bd = !dargs.Contains(fix); if (br) *pi_restr++ = (ushort)fix; if (bd) *pi_dargs++ = (ushort)fix; if (br && bd) *pi_da_rs++ = (ushort)fix; } *p0 = (ushort)(pi_restr - p0 - 1); *p1 = (ushort)(pi_dargs - p1 - 1); *p2 = (ushort)(pi_da_rs - p2 - 1); i++; } } }; }