using System; using System.Text; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using miew.Enumerable; namespace agree { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// A value type for transforming TDL tokens (in the form of BaseFeatureConstraints) into persisted TFSs. /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// [DebuggerDisplay("{ToString(),nq}")] public sealed class TdlNavigator : IEnumerator<TdlTok>, IDisposable { public TdlNavigator(Tfs tfs, Instance inst) { this.tfs = tfs; this.inst = inst; this.pp = default(TdlParsePos); this.corefs = null; this.edge_out = default(Edge); } readonly Instance inst; TypeMgr tm { get { return inst.tm; } } Edge edge_out; Tfs tfs; [DebuggerBrowsable(DebuggerBrowsableState.RootHidden)] Dictionary<String, Coreference> corefs; TdlParsePos pp; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Private helper class for use by TdlNavigator /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// [DebuggerDisplay("{ToString(),nq}")] class Coreference : HashSet<ConstraintRef> { public Coreference(TdlTok tok) { this.tok = tok; } public TdlTok tok; public override String ToString() { return String.Format("#{0} {1}", tok.i_s, this.Select(x => new FeatMark(x.i_feat, x.Host.Mark)).StringJoin(" ")); } }; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void AddCoref(String tag, TdlTok tok, ConstraintRef cref) { ((MarkingTfs)tfs).ChangeEdgeCoreference(cref.FeatMark, true); Coreference ent; if (corefs == null) corefs = new Dictionary<String, Coreference> { { tag, ent = new Coreference(tok) } }; else if (!corefs.TryGetValue(tag, out ent)) corefs.Add(tag, ent = new Coreference(tok)); ent.Add(cref); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// update coreference table /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// bool UpdateCoref(int in_mark, Edge e_target) { /// we don't want to include the host type in our search for matches in the coreference table /// because unification is permitted foreach (Coreference cr in corefs.Values) { foreach (ConstraintRef cref in cr) { /// we don't want to include the host type in our search for matches in the coreference table /// because unification is permitted if (cref.Host.Mark == in_mark) { /// found a match in the table. check that the host types unify. if (!tm.CanUnify(e_target.FlagsId, cref.Host.FlagsId)) return false; /// patch the coreference table to reflect the upcoming change cr.Remove(cref); cr.Add(new ConstraintRef(tfs, e_target, cref.i_feat)); break; // can we double-break here? } } } return true; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// fixup coreferences /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void IDisposable.Dispose() { if (corefs != null) { var incomplete_coref = corefs.FirstOrDefault(tt => tt.Value.Count < 2); if (!incomplete_coref.Equals(default(KeyValuePair<String, TdlNavigator.Coreference>))) throw new TdlException(incomplete_coref.Value.tok, "Coreference tag #{0} only used once", incomplete_coref.Key); List<String> tags_done = new List<String>(); foreach (TdlNavigator.Coreference coref in corefs.Values) { /// enumerate the cached structures. var ie = coref.GetEnumerator(); ie.MoveNext(); /// destructively unify the remaining edges into the starting edge ConstraintRef cr_cur = ie.Current; while (ie.MoveNext()) { /// The edge under cr_cur changeshere, so make sure to retrieve and set the (final) new edge in a /// separate pass, below if (!DestructiveUnifyNoCoreference(cr_cur, ie.Current.Constraint)) throw new Exception(); } Edge e_cur = cr_cur.Constraint; if (!e_cur.IsCoreferenced) e_cur = ((MarkingTfs)tfs).CreateRecycledEdge(tm.GetEdgeType(e_cur.FlagsId), e_cur.Mark, true); // set the new edge into all the sources foreach (ConstraintRef cr in coref) cr.SetConstraint(e_cur); } corefs = null; } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// This function is only used when fixing-up coreferences that appear within a single definition. Therefore, /// e1 and e2 are known to be inside the TFS and either can be returned. The exception is the case of the glb /// being neither type, when we allocate a new edge /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// bool DestructiveUnifyNoCoreference(ConstraintRef cr1, Edge e2) { Edge e1 = cr1.Constraint; Edge e_tmp; if (((e1.FlagsId | e2.FlagsId) & Edge.Flag.EtmString) != 0) { if (!tm.strings.Unify(e1.Mark != 0 ? e1.Mark : e2.Mark, e1, e2, (MarkingTfs)tfs, out e_tmp)) return false; cr1.SetConstraint(e_tmp); return true; } int id1 = (int)(e1.FlagsId & Edge.Flag.MultiIdMask); int id2 = (int)(e2.FlagsId & Edge.Flag.MultiIdMask); if (id1 == 0) { if (id2 != 0) cr1.SetConstraint(e2); return true; } if (id2 == 0) return true; int glb_id = tm.GetGlbCheckTopAndIdentity(id1, id2); if (glb_id == -1) return false; if (glb_id == id1) return DestructiveUnifyInFeatures(e1, e2); if (glb_id == id2) { cr1.SetConstraint(e2); return DestructiveUnifyInFeatures(e2, e1); } e_tmp = ((MarkingTfs)tfs).CreateEdge(tm.type_arr[glb_id], true); if (!DestructiveUnifyInFeatures(e_tmp, e2) || !DestructiveUnifyInFeatures(e_tmp, e1)) return false; cr1.SetConstraint(e_tmp); return true; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// bool DestructiveUnifyInFeatures(Edge e1, Edge e2) { foreach (int fix in tm.rgrgfix_by_type[(int)(e2.FlagsId & Edge.Flag.MultiIdMask)]) { Edge ne2; if (!tfs.TryGetEdge(fix, e2.Mark, out ne2)) continue; Edge ne1; if (!tfs.TryGetEdge(fix, e1.Mark, out ne1)) { /// the destructive target has no edge specified so we will hijack the other edge into the TFS /// by just changing its in-mark. First add a duplicate under the new mark, then remove it below tfs.SetEdge(fix, e1.Mark, ne2); } else if (ne1.Mark != ne2.Mark || ne1.FlagsId != ne2.FlagsId) { if (!DestructiveUnifyNoCoreference(new ConstraintRef(tfs, e1, fix), ne2)) return false; } /// If the other edge is coreferenced, then we know that something in the coreference table is /// pointing at it, so prior to removing it, we must find those entries and update them. if (ne2.IsCoreferenced && !UpdateCoref(e2.Mark, e1)) return false; /// remove the garbage edge tfs.RemoveEdge(fix, e2.Mark); } return true; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Entry point for this structure /// </summary> /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public void AcceptBaseConstraint(ConstraintRef cref, BaseFeatConstraint bfc) { edge_out = cref.Host; pp = new TdlParsePos(bfc); if (pp.MoveNext()) AcceptFeatures(cref, TdlTok.TokMap_Empty); if (!pp.Eof) throw new TdlException(pp.Current, "TDL parser returned without using all tokens."); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// Edge AcceptFeatures(ConstraintRef cr_base, byte[] tok_map) { ConstraintRef cr = cr_base; while (!pp.Eof) { // _ _ // | current | // | FEAT type & ... | // |_ ^ _| VerifyTokenType(TdlTok.Type.Identifier, "Expected a feature name (1)"); int i_feat = tm.GetFeatureIndex(pp.CurString); if (!cr.HostType.HasFeature(i_feat)) { Type tt_maximal = tm.GetMaximalTypeForFeature(i_feat); int glb_id; if ((glb_id = tm.GetGlbCheckTopAndIdentity(cr.HostType.m_id, tt_maximal.m_id)) == -1) throw new TdlException(Current, "Error inferring type. '{0}' is not appropriate for '{1}'. (Maximal type '{1}' for feature '{0}' does not unify with previously seen type '{2}'.)", pp.CurString.ToUpper(), cr.HostType.Name, tt_maximal.Name); cr.SetHostType(tm.type_arr[glb_id]); cr_base = cr; } cr.SwitchToFeature(i_feat); TdlTok.Type tok = MoveNextThrow().t; // Follow a path specification if (tok == TdlTok.Type.Dot) { MoveNextThrow(); VerifyTokenType(TdlTok.Type.Identifier, "Error following feature path specification. Expected a feature name after '.'"); // _ _ // | current | // | F1 . F2 type & ... | // |_ ^ _| // // If the next feature in the path is not appropriate for the current type, now is the time to infer // a more specific type by peeking ahead. To that feature, assign a node with a type corresponding to // the maximal type for the next feature in the path. i_feat = tm.GetFeatureIndex(pp.CurString); if (!cr.ConstraintType.HasFeature(i_feat)) { Type tt_maximal = tm.GetMaximalTypeForFeature(i_feat); if (!cr.UnifyInConstraintType(tt_maximal)) throw new TdlException(Current, "Error inferring type. Existing type {0} for feature {1} failed to unify with maximal type {2} ", cr.ConstraintType.Name, pp.CurString.ToUpper(), tt_maximal.Name); } cr = cr.NextConstraint(i_feat); } else if (tok == TdlTok.Type.Identifier || tok == TdlTok.Type.Tag || tok == TdlTok.Type.SquareOpen || tok == TdlTok.Type.AngleOpen || tok == TdlTok.Type.DifferenceListOpen || tok == TdlTok.Type.String) { // _ _ // | current | // | FEAT type & ... | // |_ ^ _| AcceptType(cr, tok_map); if (pp.Eof || tok_map[(int)pp.CurType] == 1) return cr_base.Host; // restore the parse level after (possibly) accepting a dotted feature path cr = cr_base; } else throw new TdlException(Current, "Expected: '.', type identifier, tag, '[', '<', or '<!'."); } throw new TdlException(Current, "Unexpected end of token stream while parsing TDL."); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// Edge AcceptList(ConstraintRef cr, int c_parts) { if (!cr.UnifyInConstraintType(tm.tt_list_head)) throw new TdlException(Current, "Error expanding list. Existing type {0} for feature {1} failed to unify with list type {2} ", cr.ConstraintType.Name, cr.Feature.ToUpper(), tm.tt_list_head.Name); ConstraintRef cref_fwd = cr; if (c_parts-- > 0) { cref_fwd = cr.NextConstraint(tm.f_ix_list_head); AcceptType(cref_fwd, TdlTok.TokMap_AngCl_Comma_Dot); // select feature 'REST' cref_fwd.SwitchToFeature(tm.f_ix_list_tail); Type term_type = tm.tt_list; if (Current.t == TdlTok.Type.Dot) { Debug.Assert(c_parts == 1, "should have prevented dotted pair with >2 parts during tokenization"); // _ _ // | current | // | FEAT < a . b > | // |_ ^ _| MoveNextThrow(); AcceptType(cref_fwd, TdlTok.TokMap_AngCl); c_parts--; } else if (Current.t == TdlTok.Type.Comma) { MoveNextThrow(); if (Current.t == TdlTok.Type.Ellipsis) { Debug.Assert(c_parts == 0); MoveNextThrow(); } } else if (Current.t == TdlTok.Type.AngleClose) { Debug.Assert(c_parts == 0); term_type = tm.tt_empty; } if ( #if PET_ENFORCES_LIST_TERM_TYPE==true term_type==tm.tt_empty && #endif !cref_fwd.UnifyInConstraintType(term_type)) throw new TdlException(Current, "Error expanding list. Existing type {0} for feature {1} failed to unify with list type {2} ", cref_fwd.ConstraintType.Name, cref_fwd.Feature.ToUpper(), term_type.Name); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// recurse down list parts /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// if (c_parts > 0) cr.SetConstraint(AcceptList(cref_fwd, c_parts)); } return cr.Host; // caller must propagate it upwards } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// Edge AcceptDifferenceList(ConstraintRef cr, int c_parts, String last_coref) { if (!cr.UnifyInConstraintType(tm.tt_list_head)) throw new TdlException(Current, "Error expanding difference list. Existing type {0} for feature {1} failed to unify with list type {2} ", cr.ConstraintType.Name, cr.Feature.ToUpper(), tm.tt_list_head.Name); ConstraintRef cref_fwd = cr; if (c_parts-- > 0) { cref_fwd = cr.NextConstraint(tm.f_ix_list_head); AcceptType(cref_fwd, TdlTok.TokMap_DlsCl_Comma); // select feature 'REST' cref_fwd.SwitchToFeature(tm.f_ix_list_tail); if (!cref_fwd.UnifyInConstraintType(tm.tt_list)) throw new TdlException(Current, "Error expanding difference list. Existing type {0} for feature {1} failed to unify with list type {2} ", cref_fwd.ConstraintType.Name, cref_fwd.Feature.ToUpper(), tm.tt_list.Name); if (Current.t == TdlTok.Type.Comma) MoveNextThrow(); if (Current.t == TdlTok.Type.DifferenceListClose) Debug.Assert(c_parts == 0); if (c_parts > 0) { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// recurse down list parts /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// cr.SetConstraint(AcceptDifferenceList(cref_fwd, c_parts, last_coref)); return cr.Host; } } AddCoref(last_coref, Current, cref_fwd); return cr.Host; // caller must propagate it upwards } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void AcceptType(ConstraintRef cr, byte[] tok_map) { // Unify together (multiple) type- and feature-structure constraints on this feature separated by TOK_AMP // _ _ // | current | // | FEAT type & #tag & [constraint] & ... | // |_ ^ _| // while (!pp.Eof) { Type tt; TdlTok.Type ttok = pp.CurType; if (ttok == TdlTok.Type.Identifier) { if (!cr.UnifyInConstraintType(tt = tm.LookupType(Current))) throw new TdlException(Current, String.Format("existing type {0} on feature {1} failed to unify with {2}", cr.ConstraintType.Name, cr.Feature.ToUpper(), tt.Name)); } else if (ttok == TdlTok.Type.String) { Edge existing_edge = cr.Constraint; String es = tm.GetStringValue(existing_edge.FlagsId); if (es != null && es != Current.i_s) { throw new TdlException(Current, String.Format("string value '{0}' for feature {1} failed to unify with string '{2}'", es, cr.Feature.ToUpper(), Current.i_s)); } else { /* must unify down to string */ int glb_id; if (!tm.IsTopId(existing_edge) && (glb_id = tm.GetGlbCheckTopAndIdentity(tm.GetEdgeType(existing_edge.FlagsId).m_id, tm.StringType.m_id)) == -1) throw new TdlException(Current, String.Format("existing type {0} on feature {1} failed to unify with string type '{2}'", cr.ConstraintType.Name, cr.Feature.ToUpper(), tm.StringType.Name)); cr.SetConstraint(((MarkingTfs)tfs).CreateStringEdge(Current.i_s)); } } else if (ttok == TdlTok.Type.Tag) { AddCoref(pp.CurString, Current, cr); } else if (ttok == TdlTok.Type.SquareOpen) { MoveNextThrow(); if (pp.CurType == TdlTok.Type.SquareClose) { // < [ ] , ... > // ^ // (do nothing) } else { VerifyTokenType(TdlTok.Type.Identifier, "Expected a feature name (2)"); // infer type by peeking ahead in the given path int i_feat_next = tm.GetFeatureIndex(pp.CurString); if (i_feat_next == -1) throw new TdlException(Current, "Error inferring type. Feature {0} was not defined for any type", pp.CurString.ToUpper()); if (!cr.ConstraintType.HasFeature(i_feat_next)) { if (!cr.UnifyInConstraintType(tt = tm.GetMaximalTypeForFeature(i_feat_next))) throw new TdlException(Current, "Error inferring type. Existing type {0} for feature {1} failed to unify with maximal type {2}", cr.ConstraintType.Name, pp.CurString.ToUpper(), tt.Name); } // recurse to build a TFS for the contents of the square brackets cr.SetConstraint(AcceptFeatures(cr.NextConstraint(i_feat_next), TdlTok.TokMap_SqCl)); } } else if (ttok == TdlTok.Type.AngleOpen) { // _ _ // | current | // | FEAT < > | // | ^ | // | FEAT < ... > | // | ^ | // | FEAT < type ... > | // | ^ | // | FEAT < #tag ... > | // | ^ | // | FEAT < [ constraint ... > | // |_ ^ _| int c_parts = Current.c_parts; MoveNextThrow(); if (c_parts > 0) AcceptList(cr, c_parts); else { if (!cr.UnifyInConstraintType(tm.tt_empty)) throw new TdlException(Current, "Error expanding list. Existing type {0} for feature {1} failed to unify with empty list type {2}", cr.ConstraintType.Name, cr.Feature.ToUpper(), tm.tt_empty.Name); } VerifyTokenType(TdlTok.Type.AngleClose); } else if (ttok == TdlTok.Type.DifferenceListOpen) { int c_parts = Current.c_parts; MoveNextThrow(); String last_coref = "diff-list-" + Guid.NewGuid().ToString(); if (!cr.UnifyInConstraintType(tm.tt_dlist_list)) throw new TdlException(Current, "Error expanding difference list. Existing type {0} for feature {1} failed to unify with difference list type {2} ", cr.ConstraintType.Name, cr.Feature.ToUpper(), tm.tt_dlist_list.Name); ConstraintRef cref_dl = cr.NextConstraint(tm.f_ix_dlist_list); if (c_parts > 0) { AcceptDifferenceList(cref_dl, c_parts, last_coref); } else { if (!cref_dl.UnifyInConstraintType(tm.tt_list)) throw new TdlException(Current, "Error expanding difference list. Existing type {0} for feature {1} failed to unify with list type {2} ", cref_dl.ConstraintType.Name, cr.Feature.ToUpper(), tm.tt_list.Name); AddCoref(last_coref, Current, cref_dl); } // unify 'LAST list' cref_dl.SwitchToFeature(tm.f_ix_dlist_last); if (!cref_dl.UnifyInConstraintType(tm.tt_list)) throw new TdlException(Current, "Error expanding difference list. Existing type {0} for feature {1} failed to unify with list type {2} ", cr.ConstraintType.Name, cr.Feature.ToUpper(), tm.tt_list.Name); AddCoref(last_coref, Current, cref_dl); VerifyTokenType(TdlTok.Type.DifferenceListClose); } else { throw new TdlException(Current, "Expected a type, tag, or constraint specification"); } // either 1. we're done with all constraints if (!MoveNext()) return; // 2. we're done with a block grouped by [ x ] < y , > <! z , !> < x . y > originating from this base level... ttok = pp.CurType; if (tok_map[(int)ttok] == 1) return; // 3. there are additional features constrained from this base level... if (ttok == TdlTok.Type.Comma) { MoveNextThrow(); return; } // 4. or there are additional constraints on this type. VerifyTokenType(TdlTok.Type.Ampersand); MoveNext(); } throw new TdlException(Current, "Unexpected end of token stream while parsing TDL."); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// /// More convenient access to the parse position /// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public TdlTok Current { get { return pp.Current; } } object System.Collections.IEnumerator.Current { get { return pp.Current; } } TdlTok MoveNextThrow(String error_msg = null) { return pp.MoveNextThrow(error_msg); } void VerifyTokenType(TdlTok.Type tt, String msg = null) { pp.VerifyTokenType(tt, msg); } public void Dispose() { pp.Dispose(); } public bool MoveNext() { return pp.MoveNext(); } public void Reset() { pp.Reset(); } public override String ToString() { if (pp.bfc == null) return ("(null parse position)"); String path = pp.bfc.Skip(Math.Max(pp.i - 1, 0)).Take(3).StringJoin(" "); String crf = corefs == null ? "(null)" : corefs.Count.ToString(); return String.Format("{0} \"… {1} …\" corefs: {2}", inst.Name, path, crf); } }; }