using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;

namespace miew.Trie
{
	using String = System.String;

	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///
	///
	///
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	public class Trie<TValue> : System.Collections.IEnumerable, IEnumerable<Trie<TValue>.TrieNodeBase>
	{
		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
		///
		/// Tested total Trie memory usage with and without base class and virtual functions; it made no difference.
		///
		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#if (DEBUG)
		[DebuggerDisplay("{parent_char}")]
#endif
		public abstract class TrieNodeBase
		{
#if (DEBUG)
			public static Trie<TValue> trie;
			public Char parent_char;
			public String _key()
			{
				if (trie.Root == this)
					return "root";
				return trie.GetKey(this);
			}
#endif
			protected TValue m_value = default(TValue);

			public TValue Value
			{
				get { return m_value; }
				set { m_value = value; }
			}

			public bool HasValue { get { return !Object.Equals(m_value, default(TValue)); } }
			public abstract bool IsLeaf { get; }

			public abstract TrieNodeBase this[char c] { get; }

			public abstract TrieNodeBase[] Nodes { get; }

			public abstract void SetLeaf();

			public abstract int ChildCount { get; }

			public abstract bool ShouldOptimize { get; }

			public abstract KeyValuePair<Char, TrieNodeBase>[] CharNodePairs();

			public abstract TrieNodeBase AddChild(char c, ref int node_count);

			/// <summary>
			/// Includes current node value
			/// </summary>
			/// <returns></returns>
			public IEnumerable<TValue> SubsumedValues()
			{
				if (Value != null)
					yield return Value;
				if (Nodes != null)
					foreach (TrieNodeBase child in Nodes)
						if (child != null)
							foreach (TValue t in child.SubsumedValues())
								yield return t;
			}

			/// <summary>
			/// Includes current node
			/// </summary>
			/// <returns></returns>
			public IEnumerable<TrieNodeBase> SubsumedNodes()
			{
				yield return this;
				if (Nodes != null)
					foreach (TrieNodeBase child in Nodes)
						if (child != null)
							foreach (TrieNodeBase n in child.SubsumedNodes())
								yield return n;
			}

			/// <summary>
			/// Doesn't include current node
			/// </summary>
			/// <returns></returns>
			public IEnumerable<TrieNodeBase> SubsumedNodesExceptThis()
			{
				if (Nodes != null)
					foreach (TrieNodeBase child in Nodes)
						if (child != null)
							foreach (TrieNodeBase n in child.SubsumedNodes())
								yield return n;
			}

			/// <summary>
			/// Note: doesn't de-optimize optimized nodes if re-run later
			/// </summary>
			public void OptimizeChildNodes()
			{
				if (Nodes != null)
					foreach (var q in CharNodePairs())
					{
						TrieNodeBase n_old = q.Value;
						if (n_old.ShouldOptimize)
						{
							TrieNodeBase n_new = new SparseTrieNode(n_old.CharNodePairs());
							n_new.m_value = n_old.m_value;
#if (DEBUG)
							n_new.parent_char = n_old.parent_char;
#endif
							//Trie<TValue>.c_sparse_nodes++;
							ReplaceChild(q.Key, n_new);
						}
						n_old.OptimizeChildNodes();
					}
			}

			public abstract void ReplaceChild(Char c, TrieNodeBase n);

		};

		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
		///
		/// Sparse Trie Node
		///
		/// currently, this one's "nodes" value is never null, because we leave leaf nodes as the non-sparse type,
		/// (with nodes==null) and they currently never get converted back. Consequently, IsLeaf should always be 'false'.
		/// However, we're gonna do the check anyway.
		/// 
		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
		public class SparseTrieNode : TrieNodeBase
		{
			Dictionary<Char, TrieNodeBase> d;

			public SparseTrieNode(IEnumerable<KeyValuePair<Char, TrieNodeBase>> ie)
			{
				d = new Dictionary<char, TrieNodeBase>();
				foreach (var kvp in ie)
					d.Add(kvp.Key, kvp.Value);
			}

			public override TrieNodeBase this[Char c]
			{
				get
				{
					TrieNodeBase node;
					return d.TryGetValue(c, out node) ? node : null;
				}
			}

			public override TrieNodeBase[] Nodes { get { return d.Values.ToArray(); } }

			/// <summary>
			/// do not use in current form. This means, run OptimizeSparseNodes *after* any pruning
			/// </summary>
			public override void SetLeaf() { d = null; }

			public override int ChildCount { get { return d.Count; } }

			public override KeyValuePair<Char, TrieNodeBase>[] CharNodePairs()
			{
				return d.ToArray();
			}

			public override TrieNodeBase AddChild(char c, ref int node_count)
			{
				TrieNodeBase node;
				if (!d.TryGetValue(c, out node))
				{
					node = new TrieNode();
					node_count++;
#if (DEBUG)
					node.parent_char = c;
#endif
					d.Add(c, node);
				}
				return node;
			}

			public override void ReplaceChild(Char c, TrieNodeBase n)
			{
#if (DEBUG)
				if (!d.ContainsKey(c))
					throw new Exception();
#endif
				d[c] = n;
			}

			public override bool ShouldOptimize { get { return false; } }
			public override bool IsLeaf { get { return d == null; } }

		};

		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
		///
		/// Non-sparse Trie Node
		///
		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
		public class TrieNode : TrieNodeBase
		{
			private TrieNodeBase[] nodes = null;
			private Char m_base;

			public override int ChildCount { get { return (nodes != null) ? nodes.Count(e => e != null) : 0; } }
			public int AllocatedChildCount { get { return (nodes != null) ? nodes.Length : 0; } }

			public override TrieNodeBase[] Nodes { get { return nodes; } }

			public override void SetLeaf() { nodes = null; }

			public override KeyValuePair<Char, TrieNodeBase>[] CharNodePairs()
			{
				KeyValuePair<Char, TrieNodeBase>[] rg = new KeyValuePair<char, TrieNodeBase>[ChildCount];
				Char ch = m_base;
				int i = 0;
				foreach (TrieNodeBase child in nodes)
				{
					if (child != null)
						rg[i++] = new KeyValuePair<char, TrieNodeBase>(ch, child);
					ch++;
				}
				return rg;
			}

			public override TrieNodeBase this[char c]
			{
				get
				{
					if (nodes != null && m_base <= c && c < m_base + nodes.Length)
						return nodes[c - m_base];
					return null;
				}
			}

			public override TrieNodeBase AddChild(char c, ref int node_count)
			{
				if (nodes == null)
				{
					m_base = c;
					nodes = new TrieNodeBase[1];
				}
				else if (c >= m_base + nodes.Length)
				{
					System.Array.Resize(ref nodes, c - m_base + 1);
				}
				else if (c < m_base)
				{
					Char c_new = (Char)(m_base - c);
					TrieNodeBase[] tmp = new TrieNodeBase[nodes.Length + c_new];
					nodes.CopyTo(tmp, c_new);
					m_base = c;
					nodes = tmp;
				}

				TrieNodeBase node = nodes[c - m_base];
				if (node == null)
				{
					node = new TrieNode();
					node_count++;
#if (DEBUG)
					node.parent_char = c;
#endif
					nodes[c - m_base] = node;
				}
				return node;
			}

			public override void ReplaceChild(Char c, TrieNodeBase n)
			{
				if (nodes == null || c >= m_base + nodes.Length || c < m_base)
					throw new Exception();
				nodes[c - m_base] = n;
			}

			public override bool ShouldOptimize
			{
				get
				{
					if (nodes == null)
						return false;
					return (ChildCount * 9 < nodes.Length);		// empirically determined optimal value (space & time)
				}
			}

			public override bool IsLeaf { get { return nodes == null; } }
		};

		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
		/// 
		/// Trie proper begins here
		///
		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

		private TrieNodeBase _root = new TrieNode();
		public int c_nodes = 0;
		//public static int c_sparse_nodes = 0;

#if (DEBUG)
		public Trie()
		{
			TrieNode.trie = this;
		}
#endif

		// in combination with Add(...), enables C# 3.0 initialization syntax, even though it never seems to call it
		public System.Collections.IEnumerator GetEnumerator()
		{
			return _root.SubsumedNodes().GetEnumerator();
		}

		IEnumerator<TrieNodeBase> IEnumerable<TrieNodeBase>.GetEnumerator()
		{
			return _root.SubsumedNodes().GetEnumerator();
		}

		public IEnumerable<TValue> Values { get { return _root.SubsumedValues(); } }

		public void OptimizeSparseNodes()
		{
			if (_root.ShouldOptimize)
			{
				_root = new SparseTrieNode(_root.CharNodePairs());
				//c_sparse_nodes++;
			}
			_root.OptimizeChildNodes();
		}

		public TrieNodeBase Root { get { return _root; } }

		public TrieNodeBase Add(String s, TValue v)
		{
			TrieNodeBase node = _root;
			foreach (Char c in s)
				node = node.AddChild(c, ref c_nodes);

			node.Value = v;
			return node;
		}

		public bool Contains(String s)
		{
			TrieNodeBase node = _root;
			foreach (Char c in s)
			{
				node = node[c];
				if (node == null)
					return false;
			}
			return node.HasValue;
		}

		/// <summary>
		/// Debug only; this is hideously inefficient
		/// </summary>
		public String GetKey(TrieNodeBase seek)
		{
			List<Char> sofar = new List<Char>();

			GetKeyHelper fn = null;
			fn = (TrieNodeBase cur) =>
			{
				sofar.Add(' ');	// placeholder
				foreach (var kvp in cur.CharNodePairs())
				{
					sofar[sofar.Count - 1] = kvp.Key;
					if (kvp.Value == seek)
						return true;
					if (kvp.Value.Nodes != null && fn(kvp.Value))
						return true;
				}
				sofar.RemoveAt(sofar.Count - 1);
				return false;
			};

			if (fn(_root))
				return new String(sofar.ToArray());
			return null;
		}


		/// <summary>
		/// Debug only; this is hideously inefficient
		/// </summary>
		delegate bool GetKeyHelper(TrieNodeBase cur);
		public String GetKey(TValue seek)
		{
			List<Char> sofar = new List<Char>();

			GetKeyHelper fn = null;
			fn = (TrieNodeBase cur) =>
			{
				sofar.Add(' ');	// placeholder
				foreach (var kvp in cur.CharNodePairs())
				{
					sofar[sofar.Count - 1] = kvp.Key;
					if (kvp.Value.Value != null && kvp.Value.Value.Equals(seek))
						return true;
					if (kvp.Value.Nodes != null && fn(kvp.Value))
						return true;
				}
				sofar.RemoveAt(sofar.Count - 1);
				return false;
			};

			if (fn(_root))
				return new String(sofar.ToArray());
			return null;
		}

#if false
		public List<String> FindMask(String mask)
		{
			return FindMask(mask, _root);
		}

		private List<String> FindMask(String mask, TrieNode<TValue> node)
		{
			char c = mask[0];
			mask = mask.Substring(1);
			List<String> list = new List<String>();
			if (c == '*')
			{
				c = node.Base;
				foreach (TrieNode<TValue> child in node.Nodes)
				{
					if (child != null)
					{
						if (mask.Length == 0)
						{
							if (child.IsEnd)
								list.Add(c.ToString());
						}
						else
						{
							foreach (String s in FindMask(mask, child))
								list.Add(c.ToString() + s);
						}
					}
					c++;
				}
			}
			else
			{
				TrieNode<TValue> child = node[c];
				if (child != null)
				{
					if (mask.Length == 0)
					{
						if (child.IsEnd)
							list.Add(c.ToString());
					}
					else
					{
						foreach (String s in FindMask(mask, child))
							list.Add(c.ToString() + s);
					}
				}
			}
			return list;
		}
#endif

		public TrieNodeBase FindNode(String s_in)
		{
			TrieNodeBase node = _root;
			foreach (Char c in s_in)
				if ((node = node[c]) == null)
					return null;
			return node;
		}

		/// <summary>
		/// If continuation from the terminal node is possible with a different input string, then that node is not
		/// returned as a 'last' node for the given input. In other words, 'last' nodes must be leaf nodes, where
		/// continuation possibility is truly unknown. The presense of a nodes array that we couldn't match to 
		/// means the search fails; it is not the design of the 'OrLast' feature to provide 'closest' or 'best'
		/// matching but rather to enable truncated tails still in the context of exact prefix matching.
		/// </summary>
		public TrieNodeBase FindNodeOrLast(String s_in, out bool f_exact)
		{
			TrieNodeBase node = _root;
			foreach (Char c in s_in)
			{
				if (node.IsLeaf)
				{
					f_exact = false;
					return node;
				}
				if ((node = node[c]) == null)
				{
					f_exact = false;
					return null;
				}
			}
			f_exact = true;
			return node;
		}

		// even though I found some articles that attest that using a foreach enumerator with arrays (and Lists)
		// returns a value type, thus avoiding spurious garbage, I had already changed the code to not use enumerator.
		public unsafe TValue Find(String s_in)
		{
			TrieNodeBase node = _root;
			fixed (Char* pin_s = s_in)
			{
				Char* p = pin_s;
				Char* p_end = p + s_in.Length;
				while (p < p_end)
				{
					if ((node = node[*p]) == null)
						return default(TValue);
					p++;
				}
				return node.Value;
			}
		}

		public unsafe TValue Find(Char* p_tag, int cb_ctag)
		{
			TrieNodeBase node = _root;
			Char* p_end = p_tag + cb_ctag;
			while (p_tag < p_end)
			{
				if ((node = node[*p_tag]) == null)
					return default(TValue);
				p_tag++;
			}
			return node.Value;
		}

		public IEnumerable<TValue> FindAll(String s_in)
		{
			TrieNodeBase node = _root;
			foreach (Char c in s_in)
			{
				if ((node = node[c]) == null)
					break;
				if (node.Value != null)
					yield return node.Value;
			}
		}

		public IEnumerable<TValue> SubsumedValues(String s)
		{
			TrieNodeBase node = FindNode(s);
			if (node == null)
				return System.Linq.Enumerable.Empty<TValue>();
			return node.SubsumedValues();
		}

		public IEnumerable<TrieNodeBase> SubsumedNodes(String s)
		{
			TrieNodeBase node = FindNode(s);
			if (node == null)
				return System.Linq.Enumerable.Empty<TrieNodeBase>();
			return node.SubsumedNodes();
		}

		public IEnumerable<TValue> AllSubstringValues(String s)
		{
			int i_cur = 0;
			while (i_cur < s.Length)
			{
				TrieNodeBase node = _root;
				int i = i_cur;
				while (i < s.Length)
				{
					node = node[s[i]];
					if (node == null)
						break;
					if (node.Value != null)
						yield return node.Value;
					i++;
				}
				i_cur++;
			}
		}

		/// <summary>
		/// note: only returns nodes with non-null values
		/// </summary>
		public void DepthFirstTraverse(Action<String, TrieNodeBase> callback)
		{
			Char[] rgch = new Char[100];
			int depth = 0;

			Action<TrieNodeBase> fn = null;
			fn = (TrieNodeBase cur) =>
			{
				if (depth >= rgch.Length)
				{
					Char[] tmp = new Char[rgch.Length * 2];
					Buffer.BlockCopy(rgch, 0, tmp, 0, rgch.Length * sizeof(Char));
					rgch = tmp;
				}
				foreach (var kvp in cur.CharNodePairs())
				{
					rgch[depth] = kvp.Key;
					TrieNodeBase n = kvp.Value;
					if (n.Nodes != null)
					{
						depth++;
						fn(n);
						depth--;
					}
					else if (n.Value == null)		// leaf nodes should always have a value
						throw new Exception();

					if (n.Value != null)
						callback(new String(rgch, 0, depth + 1), n);
				}
			};

			fn(_root);
		}


		/// <summary>
		/// note: only returns nodes with non-null values
		/// </summary>
		public void EnumerateLeafPaths(Action<String, IEnumerable<TrieNodeBase>> callback)
		{
			Stack<TrieNodeBase> stk = new Stack<TrieNodeBase>();
			Char[] rgch = new Char[100];

			Action<TrieNodeBase> fn = null;
			fn = (TrieNodeBase cur) =>
			{
				if (stk.Count >= rgch.Length)
				{
					Char[] tmp = new Char[rgch.Length * 2];
					Buffer.BlockCopy(rgch, 0, tmp, 0, rgch.Length * sizeof(Char));
					rgch = tmp;
				}
				foreach (var kvp in cur.CharNodePairs())
				{
					rgch[stk.Count] = kvp.Key;
					TrieNodeBase n = kvp.Value;
					stk.Push(n);
					if (n.Nodes != null)
						fn(n);
					else
					{
						if (n.Value == null)		// leaf nodes should always have a value
							throw new Exception();
						callback(new String(rgch, 0, stk.Count), stk);
					}
					stk.Pop();
				}
			};

			fn(_root);
		}

		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
		///
		/// Convert a trie with one value type to another
		///
		///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
		public Trie<TNew> ToTrie<TNew>(Func<TValue, TNew> value_converter)
		{
			Trie<TNew> t = new Trie<TNew>();
			DepthFirstTraverse((s, n) =>
			{
				t.Add(s, value_converter(n.Value));
			});
			return t;
		}
	};

	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///
	///
	///
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	public static class TrieExtension
	{
		public static Trie<TValue> ToTrie<TValue>(this IEnumerable<String> src, Func<String, int, TValue> selector)
		{
			Trie<TValue> t = new Trie<TValue>();
			int idx = 0;
			foreach (String s in src)
				t.Add(s, selector(s, idx++));
			return t;
		}

		public static Trie<TValue> ToTrie<TValue>(this Dictionary<String, TValue> src)
		{
			Trie<TValue> t = new Trie<TValue>();
			foreach (var kvp in src)
				t.Add(kvp.Key, kvp.Value);
			return t;
		}

		public static IEnumerable<TValue> AllSubstringValues<TValue>(this String s, Trie<TValue> trie)
		{
			return trie.AllSubstringValues(s);
		}

		public static void AddToValueHashset<TKey, TValue>(this Dictionary<TKey, HashSet<TValue>> d, TKey k, TValue v)
		{
			HashSet<TValue> hs;
			if (d.TryGetValue(k, out hs))
				hs.Add(v);
			else
				d.Add(k, new HashSet<TValue> { v });
		}
	};
}