using System;
using System.Diagnostics;
using System.IO;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;

using miew.String;
using miew.Enumerable;

namespace agree
{
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	/// <summary>
	/// Error position information in a TDL file
	/// </summary>
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	public class ErrorPos
	{
		public ErrorPos(int l, int c, String f)
		{
			line = l;
			col = c;
			file = f;
		}

		public int line;
		public int col;
		public String file;
	}


	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	/// <summary>
	/// TDL token
	/// </summary>
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	public class TdlTok : ErrorPos
	{
		static public byte[] TokMap_Empty;
		static public byte[] TokMap_Comma;
		static public byte[] TokMap_SqCl;
		static public byte[] TokMap_AngCl;
		static public byte[] TokMap_AngCl_Comma;
		static public byte[] TokMap_AngCl_Comma_Dot;
		static public byte[] TokMap_DlsCl_Comma;

		static int c_enum;
		static TdlTok()
		{
			c_enum = Enum.GetValues(typeof(TdlTok.Type)).Cast<int>().Max() + 1;

			TokMap_Empty = TdlTok.CreateTokenMap();
			TokMap_SqCl = TdlTok.CreateTokenMap(TdlTok.Type.SquareClose);
			TokMap_Comma = TdlTok.CreateTokenMap(TdlTok.Type.Comma);
			TokMap_AngCl = TdlTok.CreateTokenMap(TdlTok.Type.AngleClose);
			TokMap_AngCl_Comma = TdlTok.CreateTokenMap(TdlTok.Type.Comma, TdlTok.Type.AngleClose);
			TokMap_AngCl_Comma_Dot = TdlTok.CreateTokenMap(TdlTok.Type.Comma, TdlTok.Type.AngleClose, TdlTok.Type.Dot);
			TokMap_DlsCl_Comma = TdlTok.CreateTokenMap(TdlTok.Type.Comma, TdlTok.Type.DifferenceListClose);
		}

		static public byte[] CreateTokenMap(params Type[] args)
		{
			byte[] tok_map = Enumerable.Repeat<byte>(0, c_enum).ToArray();
			foreach (Type t in args)
				tok_map[(int)t] = 1;
			return tok_map;
		}

		public enum Type
		{
			Identifier = 1,
			String,
			Tag,					// #...
			// single character tokens
			Ampersand,				// &
			Dot,					// .
			Comma,					// ,
			SquareOpen,				// [
			SquareClose,			// ]
			AngleOpen,				// <
			AngleClose,				// >
			// atomic 2-character tokens (can contain whitespace because of matrix.tdl)
			Append,					// :+
			Define,					// :=   : =   :<
			DifferenceListOpen,		// <!   < !
			DifferenceListClose,	// !>   ! >
			Ellipsis,				// ...
			// morphology specification also uses inherited type
			Morphology,
		};

		public TdlTok(int l, int c, String f, Type t)
			: base(l, c, f)
		{
			this.t = t;
		}

		public TdlTok(int l, int c, String f, Type t, String s)
			: base(l, c, f)
		{
			this.t = t;
			this.i_s = String.Intern(s);
		}


		readonly public Type t;
		public String i_s;
		public int c_parts;

		public String _Debug()
		{
			return String.Format("{0} {1} {2} {3} {4}", file, line, col, t.ToString(), i_s == null ? String.Empty : i_s);
		}

		public override String ToString()
		{
			String s = t.Render();
			if (s != null)
				return s;
			if (this is TdlTokPrefix)
				return " %prefix " + i_s + " ";
			if (this is TdlTokSuffix)
				return " %suffix " + i_s + " ";
			switch (t)
			{
				case TdlTok.Type.Identifier:
					return " " + i_s + " ";
				case TdlTok.Type.String:
					return " \"" + i_s + "\" ";
				case TdlTok.Type.Tag:
					return " #" + i_s + " ";
				default:
					throw new Exception();
			}
		}
	};

	[Serializable]
	[DebuggerDisplay("{ToString(),nq}")]
	public struct MorphologySubrule
	{
		public Regex regex;
		public String replace;
		public override string ToString()
		{
			return String.Format("{0} {1}", regex, replace);
		}
	};

	abstract class TdlTokMorphology : TdlTok
	{
		public TdlTokMorphology(int l, int c, String f)
			: base(l, c, f, Type.Morphology)
		{
		}
		public List<MorphologySubrule> subrules = new List<MorphologySubrule>();
	};

	class TdlTokPrefix : TdlTokMorphology
	{
		public TdlTokPrefix(int l, int c, String f) : base(l, c, f) { }
	};

	class TdlTokSuffix : TdlTokMorphology
	{
		public TdlTokSuffix(int l, int c, String f) : base(l, c, f) { }

	};

	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///
	/// TDL token extension function(s)
	///
	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	public static class TdlTokExt
	{
		public static String Render(this TdlTok.Type t)
		{
			switch (t)
			{
				case TdlTok.Type.Ampersand:
					return "&";
				case TdlTok.Type.Dot:
					return ".";
				case TdlTok.Type.Comma:
					return ",";
				case TdlTok.Type.SquareOpen:
					return "[";
				case TdlTok.Type.SquareClose:
					return "]";
				case TdlTok.Type.AngleOpen:
					return "<";
				case TdlTok.Type.AngleClose:
					return ">";
				case TdlTok.Type.Append:
					return ":+";
				case TdlTok.Type.Define:
					return ":=";			// also, :<
				case TdlTok.Type.DifferenceListOpen:
					return "<!";
				case TdlTok.Type.DifferenceListClose:
					return "!>";
				case TdlTok.Type.Ellipsis:
					return "...";
				default:
					return null;
			}
		}
	};

	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///
	/// Base Feature or Base Constraint
	///
	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	[DebuggerDisplay("{ToString(),nq}")]
	public class BaseFeatConstraint : List<TdlTok>
	{
		public override String ToString()
		{
			return this.StringJoin(" ");
		}
	};

	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///
	/// Token Definition Group
	///
	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	public class TokenDefinitionGroup
	{
		public enum Type
		{
			Tdl,
			GrammarRule,
			LexicalRule,
			Lexicon,
			Label,
			Start,
		};

		public TokenDefinitionGroup(Type type, TdlTok _t)
		{
			this.type = type;
			this.tok_ident = _t;
		}
		/*readonly*/
		public Type type;

		readonly public TdlTok tok_ident;	// identifier

		public bool f_append;		// false for :=  true for :+

		public List<TdlTok> conj_par = new List<TdlTok>();	// conjoined parents

		public List<BaseFeatConstraint> m_bfc;	// contents of feature structure(s)

		public String top_comment;		// conjoined top-level comment

		public List<MorphologySubrule> morph_subrules;	// morphology subrules

		public String Render()
		{
			String ret = String.Format("Type:\t{0}\n", tok_ident.i_s);
			if (conj_par != null && conj_par.Count > 0)
				ret += String.Format("Parents:\t{0}\n", conj_par.Select(e => e.i_s).StringJoin(" "));
			if (m_bfc != null)
			{
				ret += "Constraint:\n\t";
				ret += m_bfc;
			}
			if (top_comment != null)
				ret += String.Format("Comment:\t{0}\n", top_comment);
			return ret;
		}

		public String Identifier { get { return tok_ident.i_s; } }
	};


	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	/// <summary>
	/// Helper class for assembling a stream of TDL tokens into token definition groups
	/// </summary>
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	class TokenGrouper : IEnumerable<TokenDefinitionGroup>
	{
		readonly static Char[] one_space = new Char[] { ' ' };

		public TokenGrouper(
			TokenDefinitionGroup.Type type,
			IEnumerable<TdlTok> ie)
		{
			this.type = type;
			this.ie = ie;
		}
		readonly TokenDefinitionGroup.Type type;
		readonly IEnumerable<TdlTok> ie;

		IEnumerator<TdlTok> iett;
		TdlTok prv;
		TdlTok cur;
		bool f_inhibit_next_advance;
		Stack<TdlTok> nest;

		bool Advance()
		{
			if (f_inhibit_next_advance)
				f_inhibit_next_advance = false;
			else if (!iett.MoveNext())
				return false;
			prv = cur;
			cur = iett.Current;
			return true;
		}

		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
		/// <summary>
		/// Groups tokens from a token stream into zero or more token definition groups.
		/// </summary>
		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
		public IEnumerator<TokenDefinitionGroup> GetEnumerator()
		{
			prv = cur = default(TdlTok);
			f_inhibit_next_advance = default(bool);
			iett = ie.GetEnumerator();
			nest = new Stack<TdlTok>();

			while (Advance())
			{
				TokenDefinitionGroup tdg = new TokenDefinitionGroup(type, cur);

				if (tdg.tok_ident.t != TdlTok.Type.Identifier)
					TdlTokenizer.ErrorExit(tdg.tok_ident, "Expected an identifier.");

				if (!Advance())
					TdlTokenizer.ErrorExit(tdg.tok_ident, "Unexpected end of file.");

				if (cur.t != TdlTok.Type.Define && cur.t != TdlTok.Type.Append)
					TdlTokenizer.ErrorExit(cur, "Expected := or :+");

				tdg.f_append = (cur.t == TdlTok.Type.Append);

				if (!Advance())
					TdlTokenizer.ErrorExit(cur, "Unexpected end of file in type definition.");

				while (cur.t == TdlTok.Type.Morphology)
				{
					/// Although supposedly forbidden by the LKB docs (Copestake 2002, p.131) some grammars get away with
					/// using morphology specifications in LexicalRule files.
					if (type != TokenDefinitionGroup.Type.LexicalRule)
						TdlTokenizer.ErrorExit(cur, "Cannot specify inflection rules in this file.");

					tdg.morph_subrules = tdg.morph_subrules ?? new List<MorphologySubrule>();
					tdg.morph_subrules.AddRange((cur as TdlTokMorphology).subrules);

					if (!Advance())
						TdlTokenizer.ErrorExit(cur, "Unexpected end of file after '%prefix' or '%suffix' in inflectional rule definition.");
				}

				// body of the definition
				nest.Clear();
				while (true)
				{
					if (cur.t == TdlTok.Type.SquareOpen)
					{
						tdg.m_bfc = tdg.m_bfc ?? new List<BaseFeatConstraint>();
						BaseFeatConstraint bfc_cur = new BaseFeatConstraint();
						nest.Push(cur);
						while (true)
						{
							if (!Advance())
								TdlTokenizer.ErrorExit(cur, "End of file searching for ']' to terminate feature structure. Unmatched '['.");

							if (cur.t == TdlTok.Type.Comma)
							{
								// count number of comma-separated items in each [ ... ] group
								nest.Peek().c_parts++;	// could be '<', '<!', '['

								if (nest.Count == 1)	// must be '['
								{
									tdg.m_bfc.Add(bfc_cur);
									bfc_cur = new BaseFeatConstraint();
									continue;
								}
							}
							else if (cur.t == TdlTok.Type.Dot)
							{
								TdlTok pk = nest.Peek();
								if (pk.t == TdlTok.Type.AngleOpen)
								{
									if (pk.c_parts > 0)
										TdlTokenizer.ErrorExit(cur, "Dotted pair notation < a . b > cannot have more than two parts.");
									pk.c_parts++;
								}
							}
							else if (cur.t == TdlTok.Type.AngleOpen || cur.t == TdlTok.Type.DifferenceListOpen || cur.t == TdlTok.Type.SquareOpen)
								nest.Push(cur);
							else if (cur.t == TdlTok.Type.AngleClose || cur.t == TdlTok.Type.DifferenceListClose || cur.t == TdlTok.Type.SquareClose)
							{
								if (nest.Count == 0)
									TdlTokenizer.ErrorExit(cur, "List not open; '{0}' is invalid.", cur);

								// the pending (open) grouping token
								TdlTok actual_open = nest.Pop();

								// hacky way to get the expected closing token corresponding to the actual opening token
								TdlTok.Type expected_close = (TdlTok.Type)(actual_open.t + 1);

								// check that the opening token for the encountered closing token matches the opening token for the
								// pending grouping type
								if (cur.t != expected_close)
									TdlTokenizer.ErrorExit(cur, "Invalid nesting '{0}', expected {1}.", cur, expected_close.ToString());

								// disallow empty parts
								if (prv.t == TdlTok.Type.Comma)
									TdlTokenizer.ErrorExit(cur, "Invalid ',', list {0} {1} cannot have empty parts.", actual_open, cur);

								// We counted zero commas in < >, which has zero parts, and we 
								// counted (e.g.) n commas in < a , b > which has n+1 parts.
								// Furthermore, < a, b, ... > will be treated as having only two parts.
								if (prv.t != actual_open.t && (actual_open.t != TdlTok.Type.AngleOpen || prv.t != TdlTok.Type.Ellipsis))
									actual_open.c_parts++;

								if (nest.Count == 0 && cur.t == TdlTok.Type.SquareClose)
								{
									tdg.m_bfc.Add(bfc_cur);
									break;
								}
							}

							bfc_cur.Add(cur);
						}
					}
					else if (cur.t == TdlTok.Type.String)
					{
						tdg.top_comment = cur.i_s;
					}
					else if (cur.t == TdlTok.Type.Identifier)
					{
						tdg.conj_par.Add(cur);
					}
					else if (cur.t == TdlTok.Type.Tag)
					{
						TdlTokenizer.ErrorExit(cur, String.Format("Cannot conjoin a co-reference tag (#{0}) at the top level of a TDL definition.", cur.i_s));
					}
					else if (cur.t == TdlTok.Type.Morphology)
					{
						TdlTokenizer.ErrorExit(cur, String.Format("Inflectional prefix or suffix must occur after ':=' and before any parent types.", cur.i_s));
					}
					else
						TdlTokenizer.ErrorExit(cur, String.Format("Unexpected token '{0}'.", cur));

					if (!Advance())
						TdlTokenizer.ErrorExit(cur, "Unexpected end of file in type definition.");

					if (cur.t == TdlTok.Type.Dot)	// all TDL definitions must be terminated with a period
					{
						yield return tdg;
						break;
					}
					else if (cur.t == TdlTok.Type.SquareOpen)
						f_inhibit_next_advance = true;	// LKB BUG -- allows feature structure without conjunction
					else if (cur.t != TdlTok.Type.Ampersand)
						TdlTokenizer.ErrorExit(cur, String.Format("'{0}' is not valid here. Expected '&' or '.'", cur));

					if (!Advance())
						TdlTokenizer.ErrorExit(cur, "Unexpected end of file in type definition.");
				}
			}
		}

		System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
		{
			throw new NotImplementedException();
		}
	};

	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	//
	//
	//
	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	public struct TdlParsePos : IEnumerator<TdlTok>
	{
		public TdlParsePos(BaseFeatConstraint bfc)
		{
			this.bfc = bfc;
			this.i = -1;
		}

		readonly public BaseFeatConstraint bfc;
		// todo: ref pp, private i
		public int i;

		public bool Eof { get { return i >= bfc.Count; } }
		public TdlTok Current { get { return bfc[i]; } }
		public TdlTok.Type CurType { get { return bfc[i].t; } }
		public String CurString { get { return bfc[i].i_s; } }
		public bool MoveNext()
		{
			if (i < bfc.Count)
				i++;
			return i < bfc.Count;
		}
		public TdlTok MoveNextThrow(String error_msg = null)
		{
			if (error_msg == null)
				error_msg = "Incomplete TDL constraint specification.";
			if (i < bfc.Count)
				return bfc[++i];
			else
				throw new TdlException(bfc[bfc.Count - 1], error_msg);
		}
		public void VerifyTokenType(TdlTok.Type tt, String msg = null)
		{
			if (msg == null)
				msg = String.Format("Expected '{0}'.", tt.Render() ?? "??");
			if (CurType != tt)
				throw new TdlException(Current, msg);
		}

		public bool SkipToType(TdlTok.Type t)
		{
			while (true)
			{
				if (i >= bfc.Count)
					return false;
				if (bfc[i].t == t)
					return true;
				i++;
			}
		}

		public override String ToString()
		{
			if (bfc == null)
				return "(m_bfc == null)";
			return bfc.Skip(i).StringJoin(" ");
		}

		public void Dispose() { }

		public void Reset() { i = -1; }

		object System.Collections.IEnumerator.Current { get { return Current; } }
	};
}