using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.IO;

using glue.Tokenization;
using glue.Debugging;
using glue.Extensions.Enumerable;
using glue.Collections.ReadOnly;
using glue.Collections.XSpinLock;

namespace agree
{
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	/// <summary>
	/// The 'count' of IParseChartEdge items encapsulated in the LexicalAnalysis enumerator is *not* necessarily the 
	/// required size of the parse chart due to overlapping tokens or multiple tokens for a given chart position; 
	/// use the ChartSize property for this.
	/// </summary>
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	[DebuggerDisplay("{ToString(),nq}")]
	public partial class LexicalAnalysis : IEnumerable<ParseChart.IParseChartToken>, IDisposable
	{
		[DebuggerBrowsable(DebuggerBrowsableState.Never)]
		readonly TokenSet ts;

		[DebuggerBrowsable(DebuggerBrowsableState.Never)]
		readonly Lexicon lex;

		[DebuggerBrowsable(DebuggerBrowsableState.Never)]
		readonly Tray tray;

		[DebuggerBrowsable(DebuggerBrowsableState.Never)]
		readonly GrammarParser gp;

		readonly List<AnalysisStack> token_analyses = new List<AnalysisStack>();

		/// <summary>
		/// Because TfsEdges are referenced by multiple stacks within this LexicalAnalysis, we provide a method for their
		/// lifetime to be managed here. If the transform creates non-canonical TFSs, they are reigstered here to be 
		/// cleaned up later. Canonical FS such as lexical rules should not be added. 
		/// </summary>
		readonly List<TfsEdge> edges = new List<TfsEdge>();

		public Lexicon Lexicon { get { return lex; } }

		public TokenSet SourceItem { get { return ts; } }

		public GrammarParser Parser { get { return gp; } }

		[DebuggerBrowsable(DebuggerBrowsableState.Never)]
		public String SourceText { get { return ts.Source.Text; } }

		[DebuggerBrowsable(DebuggerBrowsableState.Never)]
		public int ChartSize { get { return ts.MinimalSpanCount; } }

		public int c_unif = 0;

		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
		/// <summary>
		/// Performs pre-parsing analysis of the tokens in a tokenization hypothesis, including lexical and irregular 
		/// stem lookup, application of morphological and lexical rules, filtering, etc. The hypothesis may be either
		/// single coverage (i.e. 'TokenizationHypothesis') or ad-hoc where token selection is deferred to the
		/// parsing phase (i.e. 'TokenizedString')
		/// 
		/// Morpholexical analysis happens in two passes. The surface-to-stem inbound pass ("downtrace") permutes affixation--
		/// regular or irregular--fanning downwards until one or more stems are reached. Then, for each discovered downtrace, 
		/// an outbound pass follows the downtrace in reverse checking the license requirements of the downtrace (aborting 
		/// if failing), interleaving non-affixing rules where possible, and permuting the skipping of downtrace elements. 
		/// Thus, at each node of the outbound pass, zero or more stacks fan out upwards.
		/// </summary>
		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
		public LexicalAnalysis(Lexicon lex, TokenSet ts, Tray tray)
		{
			this.lex = lex;
			this.tray = tray;
			this.gp = lex.Grammar.Parser;
			this.ts = ts;

			/// Find zero or more analysis stacks for each source token and add them to a bag. Note that we are mostly operating 
			/// from the surface form only, so the token 'tok' that is passed in is not referenced unless we need to check when 
			/// postulating a multi-word stem. Otherwise the analyzer doesn't care about how the tokens are laid out, 
			/// overlapping, or gapped in the source, and they are returned as a bag that is, in principle, unordered.
			foreach (TokenSet.Token tok in ts)
			{
				DownTrace dt = new DownTraceSurface(this, tok.Text);
				token_analyses.AddRange(dt.TryNonAffixingRulesOutbound(tok));
				c_unif += dt.Sum(_dt => _dt.c_unif);
			}

#if DEBUG
			TextWriter tw = lex.Grammar.Parser.DebugOutput;
			if (tw != null)
			{
				int i = 0;
				foreach (AnalysisStack stk in token_analyses)
				{
					tw.WriteLineColorSync("$red analysis #{0}", i++);
					int j = stk.Count;
					foreach (LexicalTransform lx in stk.AsEnumerable().Reverse())
					{
						tw.WriteLineColorSync("\t{0,2} $darkyellow {1,-11}$ {2,19}\t{3,-20}",
							--j,
							"\"" + lx.CurrentForm.StringJoin(" ") + "\"",
							lx.GetType().Name,
							lx.license.Name);
					}
					tw.WriteLine();
				}
			}
#endif
		}

		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
		/// <summary>
		/// DownTrace : inverted tree of morphological transforms from the surface (top) towards the stem (leaves)
		/// </summary>
		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
		[DebuggerDisplay("{form}")]
		abstract class DownTrace : IEnumerable<DownTrace>
		{
			internal readonly LexicalAnalysis la;
			protected readonly DownTrace m_prev;
			public int c_unif = 0;
			String form;

			public DownTrace(LexicalAnalysis la, DownTrace prev, String form)
			{
				this.la = la;
				this.m_prev = prev;
				this.form = form;
			}

			Lexicon Lexicon { get { return la.lex; } }

			Tray Tray { get { return la.tray; } }

			///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			/// <summary>
			/// Stacks which permute all licensed orthographic changes are generated on the way down from the surface towards 
			/// the stem, and recorded in an (inverted) tree of DownTrace objects. Here, when unwinding, non-affixing changes are 
			/// inserted into these trees, possibly multiplying branches upwards at any point.
			/// </summary>
			///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			public IEnumerable<AnalysisStack> TryNonAffixingRulesOutbound(TokenSet.Token tok)
			{
				return OutboundMultiplier(TryAffixingRulesInbound(tok));
			}

			///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			/// <summary>
			/// An arbitrary number of non-affixing rules can interleave. As long as we find any, multiply the returned
			/// stacks recursively upwards.
			/// </summary>
			///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			IEnumerable<AnalysisStack> OutboundMultiplier(IEnumerable<AnalysisStack> seq)
			{
				foreach (AnalysisStack analysis_stack in seq)
				{
					foreach (var new_stk in OutboundMultiplier(FindNonAffixingRules(analysis_stack)))
						yield return new_stk;

					/// also return the stack without any non-affixing lexical rules applied
					/// todo: is this only valid if it matches the surface form?
					yield return analysis_stack;
				}
			}

			///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			/// <summary>
			/// Return new stacks corresponding to any valid non-affixing lexical rules to the top of the specified stack. 
			/// If no further transformation can be generated on this stack, return an empty sequence.
			/// </summary>
			///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			IEnumerable<AnalysisStack> FindNonAffixingRules(AnalysisStack analysis_stack)
			{
				LexicalTransform lx = analysis_stack.Top;
				TfsEdge candidate = lx.feature_structure;

				IEnumerable<LexicalRule> rules_to_use;
				LexicalRule lxr = lx.license as LexicalRule;
				if (lxr is LexicalRule)
					rules_to_use = lxr.CompatibleKeyMothers.OfExactType<LexicalRule>();
				else
					rules_to_use = Lexicon.non_morph_lexrules;

				foreach (LexicalRule lex_rule in rules_to_use)
				{
					/// try the unification
					Unification.Partial uh = new Unification.Partial(Tray);
					c_unif++;

					TfsEdge full, daughter = lex_rule.RuleDaughters[0];
					if (uh.UnifyAndComplete(lex_rule.Contents, daughter, candidate, out full))
					{
						la.edges.Add(full);

						/// we are outbound and need to fan out upwards so make a copy of the stack below, add the
						/// new result, and return the "extra" result
						AnalysisStack newstk = new AnalysisStack(analysis_stack);
						newstk.Add(new LxNonAffixing(newstk.Top.CurrentForm, lex_rule, full));
						yield return newstk;
					}
				}
			}

			///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			/// <summary>
			/// Return zero or more lexical analysis stacks for the specified surface form. Each lexical analysis stack that is 
			/// gathered has exactly one stem, and is built from the stem (at index 0) "upwards." The source token that is 
			/// supplied is advisory only in the sense that its text does not reflect the current state of morphological 
			/// processing. It is only used for matching the stem against multi-word lexical entries.
			/// </summary>
			///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			IEnumerable<AnalysisStack> TryAffixingRulesInbound(TokenSet.Token tok)
			{
				/// 0. mere availability of an irregular form will block all regular affixing rules if 
				/// irregular-forms-only-p is enabled.
				List<Lexicon.Irreg> rg_irreg_info = null;
				if (Lexicon.irreg_dict != null && !this.HasIrregular)
				{
					if (Lexicon.irreg_dict.TryGetValue(form, out rg_irreg_info))
						Debug.Assert(rg_irreg_info.Count > 0);
				}

				/// 1. Irregular affix (or non-affixing transform)
				/// At this point we are only allowing a single irregular inflection to be added to the downtrace, but
				/// this is now easy to reconfigure.
				if (rg_irreg_info != null)
				{
					foreach (Lexicon.Irreg irreg_info in rg_irreg_info)
					{
						LexicalRule lex_rule = irreg_info.rule;
						DownTrace dt = new DownTraceIrregular(this, irreg_info.stem);

						/// propagate downwards towards stem first, and only unify with the stacks that come back, if any
						foreach (AnalysisStack analysis_stack in dt.TryNonAffixingRulesOutbound(tok))
						{
							/// rule check pre-filter
							LexicalRule lr = analysis_stack.Top.license as LexicalRule;
							if (lr == null || lr.CompatibleKeyMothers.Contains(lex_rule))
							{
								/// try the unification
								Unification.Partial uh = new Unification.Partial(Tray);
								c_unif++;

								TfsEdge full;
								if (uh.UnifyAndComplete(lex_rule.Contents,
														lex_rule.RuleDaughters[0],
														analysis_stack.Top.feature_structure,
														out full))
								{
									la.edges.Add(full);
									analysis_stack.Add(new LxIrregular(form, lex_rule, full));
									yield return analysis_stack;
								}
							}
						}
						c_unif += dt.c_unif;
					}
				}

				/// 2. Regular, affixing lexical rules
				if (rg_irreg_info == null || !la.Parser.Config.IrregularFormsOnly)
				{
					foreach (MorphologicalRule morph_rule in Lexicon.morph_lexrules)
					{
						foreach (MorphologySubrule subrule in morph_rule.Subrules)
						{
							String newform = subrule.regex.Replace(form, subrule.replace);
							if (newform != form)
							{
								DownTrace dt = new DownTraceRegular(this, newform);

								/// propagate downwards towards stem first, and only unify with the stacks that come back, if any
								foreach (AnalysisStack analysis_stack in dt.TryNonAffixingRulesOutbound(tok))
								{
									/// rule check pre-filter
									LexicalRule lr = analysis_stack.Top.license as LexicalRule;
									if (lr == null || lr.CompatibleKeyMothers.Contains(morph_rule))
									{
										/// try the unification
										Unification.Partial uh = new Unification.Partial(Tray);
										c_unif++;

										TfsEdge full;
										if (uh.UnifyAndComplete(morph_rule.Contents,
																morph_rule.RuleDaughters[0],
																analysis_stack.Top.feature_structure,
																out full))
										{
											la.edges.Add(full);

											analysis_stack.Add(new LxRegularAffixing(form, morph_rule, full));
											yield return analysis_stack;
										}
									}
								}
								c_unif += dt.c_unif;
							}
						}
					}
				}

				/// 3. Stem
				bool f_did_expand;

				TokenSet.Token[][] rg_seq = null;
				foreach (LexicalEntry le in Lexicon[form])
				{
					int lex_entry_arity = le.Lemmata.Count;
					if (lex_entry_arity > 1)
					{
						rg_seq = rg_seq ?? tok.RightAdjacencySequences().Select(ie => ie.ToArray()).ToArray();
						if (rg_seq.Length > 1)
						{
							Nop.CodeCoverage();
							// probably need to filter the sequences for distinctness here (array-value equality) since there may be dups after truncating
							//var yz = rg_seq.Where(rgt => rgt.Length >= lex_entry_arity - 1).Select(ie => ie.Take(lex_entry_arity - 1).ToArray()).ToArray();
							//if (yz.Distinct(rgt => rgt.StringJoin("")).Count() != yz.Length)
						}

						/// ensure that the all parts of a multi-word lexical entry match an available adjacency 
						/// sequence in the surface input
						foreach (var adj_seq in rg_seq)
						{
							/// first token, which is not included in adj_seq, has already been checked
							if (1 + adj_seq.Length >= lex_entry_arity)
							{
								for (int i = 1; i < lex_entry_arity; i++)
									if (!Lexicon.TextComparer.Equals(adj_seq[i - 1].Text, le.Lemmata[i]))
										goto no_match;
								Span sp = new Span(tok.TokenSpan.StartIndex, adj_seq[lex_entry_arity - 2].TokenSpan.EndIndex);

								yield return new AnalysisStack(la, new LxMultiTokenStem(sp, le, out f_did_expand));
								if (f_did_expand)
									c_unif++;
							}
						no_match:
							;
						}
					}
					else
					{
						yield return new AnalysisStack(la, new LxSingleTokenStem(tok, le, out f_did_expand));
						if (f_did_expand)
							c_unif++;
					}
				}

				/// 4. Check non-initial lemma in multi-word lexeme (only if there is a spelling change, otherwise handled 
				/// by step #3. Using HasSpellingChange means non-affixing irregulars are blocked here, so "world series" 
				/// will not generate a stack for plural 'series' in this step. If the requirement were changed to HasTransform 
				/// instead, the plural "world series" would be generated but at the expense of creating also another singular 
				/// one, duplicating a (presumably) singular one from step #3. The duplicates scenario seems like the worse 
				/// problem.
				if (HasSpellingChange)
				{
					foreach (Lexicon.NonInitialMwe mwe in Lexicon.mwe_lookup[form])
					{
						LexicalEntry le = mwe.lex_entry;

						/// check to the left. There will always be at least one.
						foreach (var adj_left in tok.LeftAdjacencySequences().Select(ie => ie.TakeLast(mwe.index).ToArray()).Where(rgt => rgt.Length == mwe.index))
						{
							for (int j = 0; j < mwe.index; j++)
								if (!Lexicon.TextComparer.Equals(adj_left[j].Text, le.Lemmata[j]))
									goto no_match;

							/// check to the right. There may be zero or more
							int rem = le.Lemmata.Count - (mwe.index + 1);
							if (rem == 0)
							{
								Span sp = new Span(adj_left[0].TokenSpan.StartIndex, tok.TokenSpan.EndIndex);

								yield return new AnalysisStack(la, new LxMultiTokenStem(sp, le, out f_did_expand));
								if (f_did_expand)
									c_unif++;
							}
							else
							{
								foreach (var adj_right in tok.RightAdjacencySequences().Select(ie => ie.Take(rem).ToArray()).Where(rgt => rgt.Length == rem))
								{
									for (int j = 0; j < rem; j++)
										if (!Lexicon.TextComparer.Equals(adj_right[j].Text, le.Lemmata[mwe.index + 1 + j]))
											goto no_match;

									Nop.CodeCoverage();
									Span sp = new Span(adj_left[0].TokenSpan.StartIndex, adj_right[rem - 1].TokenSpan.EndIndex);

									yield return new AnalysisStack(la, new LxMultiTokenStem(sp, le, out f_did_expand));
									if (f_did_expand)
										c_unif++;
								}
							}
						no_match:
							;
						}
					}
				}
			}

			/// <summary>
			/// Are there any irregular inflections in this downtrace?
			/// </summary>
			public bool HasIrregular { get { return this.Any(dt => dt is DownTraceIrregular); } }

			/// <summary>
			/// Are there any affixation in this downtrace?
			/// </summary>
			public bool HasTransform { get { return this.Any(dt => dt is DownTraceTransform); } }

			/// <summary>
			/// Are there any spelling changes in this downtrace?
			/// </summary>
			public bool HasSpellingChange { get { return this.Any(dt => dt.form != form); } }

			/// <summary>
			/// Each DownTrace is polymorphic with the linked list that it is at the end of. This linked list which represents 
			/// the path so far--in a tree of all such paths--from the surface token in the current analysis descent.
			/// </summary>
			public IEnumerator<DownTrace> GetEnumerator()
			{
				if (m_prev != null)
					foreach (DownTrace prev in m_prev)
						yield return prev;
				yield return this;
			}

			IEnumerator IEnumerable.GetEnumerator() { return GetEnumerator(); }

#if DEBUG
			[DebuggerBrowsable(DebuggerBrowsableState.RootHidden)]
			protected DownTrace[] _dbg_display { get { return this.ToArray(); } }
#endif
		};

		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
		/// <summary>
		/// DownTrace implementations:
		/// </summary>
		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
		class DownTraceSurface : DownTrace
		{
			public DownTraceSurface(LexicalAnalysis la, String form) : base(la, null, form) { }
		};

		abstract class DownTraceTransform : DownTrace
		{
			public DownTraceTransform(DownTrace prev, String form) : base(prev.la, prev, form) { }
		};

		class DownTraceIrregular : DownTraceTransform
		{
			public DownTraceIrregular(DownTrace prev, String form) : base(prev, form) { }
		};

		class DownTraceRegular : DownTraceTransform
		{
			public DownTraceRegular(DownTrace prev, String form) : base(prev, form) { }
		};

		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
		/// <summary>
		/// mundane:
		/// </summary>
		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
		public void Dispose()
		{
			foreach (TfsEdge te in edges)
				te.Dispose();
			edges.Clear();
		}

		public IEnumerator<ParseChart.IParseChartToken> GetEnumerator()
		{
			foreach (var ipce in token_analyses)
				yield return ipce;
		}

		IEnumerator IEnumerable.GetEnumerator() { return GetEnumerator(); }

		public override string ToString()
		{
			return String.Format("tokens: {0} chart size: {1} [{2}]", token_analyses.Count, ChartSize, ts.Source.Text);
		}

#if DEBUG
		[DebuggerBrowsable(DebuggerBrowsableState.RootHidden)]
		public AnalysisStack[] _RGTOKA { get { return token_analyses.ToArray(); } }
#endif

		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
		/// <summary>
		/// AnalysisStacks are built from the bottom up only, so you must supply the stem here in order to create
		/// a stack. There must be exactly one stem.
		/// </summary>
		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
		[DebuggerDisplay("{ToString(),nq}")]
		public class AnalysisStack : List<LexicalTransform>, ParseChart.IParseChartToken
		{
			[DebuggerBrowsable(DebuggerBrowsableState.Never)]
			readonly LexicalAnalysis analysis;
			readonly Tray tray;
			readonly Lexicon lex;

			public AnalysisStack(LexicalAnalysis analysis, LxStem mx)
			{
				this.analysis = analysis;
				this.tray = analysis.tray;
				this.lex = analysis.lex;
				base.Add(mx);
			}

			public AnalysisStack(AnalysisStack to_copy)
			{
				this.analysis = to_copy.analysis;
				this.AddRange(to_copy);
			}

			[DebuggerBrowsable(DebuggerBrowsableState.Never)]
			public LxStem Stem
			{
				get
				{
					Debug.Assert(this[0] is LxStem);
					return (LxStem)this[0];
				}
			}

			[DebuggerBrowsable(DebuggerBrowsableState.Never)]
			public LexicalTransform Top { get { return this[this.Count - 1]; } }

			[DebuggerBrowsable(DebuggerBrowsableState.Never)]
			public TfsEdge Contents { get { return Top.feature_structure; } }

			[DebuggerBrowsable(DebuggerBrowsableState.Never)]
			public Span ChartSpan { get { return Stem.ChartSpan; } }

			public string Text { get { return analysis.SourceItem.Source.MinimalSpanText(ChartSpan); } }

			public bool SpinCompare(TfsEdge other)
			{
				return this.Contents.Type == other.Type;
			}

			public void Dispose() { }

			///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			/// <summary>
			/// IAtomicallySequencable(ParseChart.IParseChartToken) implementation
			/// To be allowed into the parse chart, you must be able to participate in sequence list atomic stamping
			/// </summary>
			///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

			int i_seq = unchecked((int)uint.MaxValue);	// should be ast.InertValue

			public uint SequenceId
			{
				get { return (uint)i_seq; }
			}

			public void StampSequence(AtomicSequencer ast)
			{
				ast.StampLocation(ref i_seq);
			}

			public bool SetInert(AtomicSequencer ast)
			{
				return ast.SetInert(ref i_seq);
			}

			public bool IsInert(AtomicSequencer ast)
			{
				return (uint)i_seq == ast.InertValue;
			}

			public ISysObj License
			{
				get { return Top.license; }
			}

			///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			/// <summary>
			///
			/// </summary>
			///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			public override string ToString()
			{
				//return String.Format("{0} transforms: {1}  stem: {2}  fs: {3}", this.ChartSpan, this.Count, this.Stem, this.Self);
				return String.Format("{0} {1} {2}", this.ChartSpan, this.StringJoin("->"), this.Contents.Type.Name);
			}

#if DEBUG
			[DebuggerBrowsable(DebuggerBrowsableState.RootHidden)]
			public LexicalTransform[] _TRANSFORMS
			{
				get { return this.AsEnumerable().Reverse().ToArray(); }
			}
#endif
		};
	};
}
Grammar Composition System