/***************************************************************************
  
  eval_analyze.c
  
  (c) Benoît Minisini <benoit.minisini@gambas-basic.org>

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2, or (at your option)
  any later version.
  
  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  
  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  MA 02110-1301, USA.
  
***************************************************************************/

#define __EVAL_ANALYZE_C

#include "gambas.h"
#include "gb_common.h"
#include "gb_array.h"
#include "eval_analyze.h"

#include "c_system.h"
/*#define DEBUG*/

static const uchar _utf8_char_length[256] =
{
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
	3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
};

static char _analyze_buffer[256];
static int _analyze_buffer_pos;

#define COLOR_BUFFER_SIZE 256
static EVAL_COLOR _colors[COLOR_BUFFER_SIZE];
static int _colors_len = 0;
static EVAL_COLOR *_color_buffer = NULL;

#define NEXT_UTF8_CHAR(_p) (_p += _utf8_char_length[(uchar)*(_p)])

static PATTERN get_previous_pattern(PATTERN *pattern, int n)
{
	while (pattern > EVAL->pattern)
	{
		pattern--;
		if (PATTERN_type(*pattern) != RT_SPACE)
		{
			n--;
			if (n <= 0)
				return *pattern;
		}
	}
	
	return NULL_PATTERN;
}

static PATTERN get_last_pattern(PATTERN *pattern)
{
	for(;;)
	{
		pattern--;
		if (PATTERN_type(*pattern) != RT_SPACE)
			return *pattern;
	}
}

static PATTERN get_next_pattern(PATTERN *pattern)
{
	for(;;)
	{
		pattern++;
		if (PATTERN_type(*pattern) != RT_SPACE)
			return *pattern;
	}
}

static int get_type(PATTERN *pattern)
{
	int type = PATTERN_type(*pattern);
	int index = PATTERN_index(*pattern);

	if (type == RT_RESERVED)
	{
		if (index == RS_IS)
		{
			if (!PATTERN_is(get_previous_pattern(pattern, 2), RS_ENUM))
				type = RT_OPERATOR;
		}
		else if (index == RS_ENUM)
		{
			if (PATTERN_is(get_last_pattern(pattern), RS_AS))
				type = RT_DATATYPE;
		}
		else if (index >= RS_COLON)
		{
			if (!((index == RS_AND || index == RS_OR) && PATTERN_is(get_next_pattern(pattern), RS_IF)))
				type = RT_OPERATOR;
		}
		else if (RES_is_type(index))
			type = RT_DATATYPE;
		else if (index == RS_WITH && pattern > EVAL->pattern)
		{
			index = PATTERN_index(get_last_pattern(pattern));
			if (index == RS_BEGINS || index == RS_ENDS)
				type = RT_OPERATOR;
		}
	}
	else if (type == RT_IDENTIFIER)
	{
		if (PATTERN_is(get_last_pattern(pattern), RS_ENUM))
		{
			PATTERN last_last = get_previous_pattern(pattern, 2); 
			PATTERN next = get_next_pattern(pattern);
			
			if (PATTERN_is_null(last_last) || PATTERN_is(last_last, RS_PUBLIC) || PATTERN_is(last_last, RS_PRIVATE))
			{
				if (PATTERN_is(next, RS_IS) || PATTERN_is_newline(next) || PATTERN_is_comment(next))
					type = RT_DATATYPE;
			}
			else if (PATTERN_is(last_last, RS_AS))
			{
				type = RT_DATATYPE;
			}
		}
	}

	return type;
}

static bool is_me_last_error_kind(PATTERN pattern)
{
	return PATTERN_is(pattern, RS_ME)
		|| PATTERN_is(pattern, RS_SUPER)
		|| PATTERN_is(pattern, RS_LAST)
		|| PATTERN_is(pattern, RS_TRUE)
		|| PATTERN_is(pattern, RS_FALSE)
		|| PATTERN_is(pattern, RS_PINF)
		|| PATTERN_is(pattern, RS_MINF)
		|| PATTERN_is(pattern, RS_ERROR)
		|| PATTERN_is(pattern, RS_NULL);
}

static bool is_optional_kind(PATTERN pattern)
{
	return PATTERN_is(pattern, RS_OPTIONAL)
		|| PATTERN_is(pattern, RS_BYREF);
}

static void get_symbol(PATTERN pattern, const char **symbol, int *len)
{
	static char keyword[32];
	int i;
	SYMBOL *sym;
	int type = PATTERN_type(pattern);
	int index = PATTERN_index(pattern);

	switch(type)
	{
		case RT_RESERVED:
			*symbol = COMP_res_info[index].name;
			*len = strlen(*symbol);
			if (!EVAL->rewrite)
			{
				memcpy(keyword, *symbol, *len);
				for (i = 0; i < *len; i++)
					keyword[i] = toupper(keyword[i]);
				*symbol = keyword;
			}
			return;
			
		case RT_INTEGER:
			*len = sprintf(keyword, "%d", PATTERN_signed_index(pattern));
			*symbol = keyword;
			return;
			
		case RT_NUMBER:
		case RT_IDENTIFIER:
			sym = TABLE_get_symbol(EVAL->table, index);
			break;
			
		case RT_CLASS:
			sym = TABLE_get_symbol(EVAL->table, index);
			break;
			
		case RT_STRING:
		case RT_TSTRING:
		case RT_COMMENT:
		case RT_ERROR:
			sym = TABLE_get_symbol(EVAL->string, index);
			break;
			
		case RT_SUBR:
			*symbol = COMP_subr_info[index].name;
			*len = strlen(*symbol);
			return;
			
		default:
			*symbol = NULL;
			*len = 0;
			return;
	}

	*symbol = sym->name;
	*len = sym->len;
	/*if (*len > EVAL_COLOR_MAX_LEN)
		*len = EVAL_COLOR_MAX_LEN;*/
}


static void add_data(int state, int len)
{
	EVAL_COLOR *color;

	while (len > EVAL_COLOR_MAX_LEN)
	{
		add_data(state, EVAL_COLOR_MAX_LEN);
		len -= EVAL_COLOR_MAX_LEN;
	}
	
	if (len == 0)
		return;
	
	if (_colors_len >= COLOR_BUFFER_SIZE)
	{
		if (!_color_buffer)
			ARRAY_create_inc(&_color_buffer, COLOR_BUFFER_SIZE);
		
		color = ARRAY_add_many(&_color_buffer, COLOR_BUFFER_SIZE);
		memcpy(color, _colors, sizeof(EVAL_COLOR) * COLOR_BUFFER_SIZE);
		_colors_len = 0;
	}
	
	color = &_colors[_colors_len];
	color->state = state;
	color->len = len;
	color->alternate = FALSE;
	_colors_len++;
}

static void add_data_merge(int state, int len)
{
	if (_colors_len > 0 && _colors[_colors_len - 1].state == state && (_colors[_colors_len - 1].len + len) <= EVAL_COLOR_MAX_LEN)
	  _colors[_colors_len - 1].len += len;
	else
		add_data(state, len);
}

static void flush_colors(EVAL_ANALYZE *result)
{
	EVAL_COLOR *color;
	
	if (_color_buffer)
	{
		if (_colors_len)
		{
			color = ARRAY_add_many(&_color_buffer, _colors_len);
			memcpy(color, _colors, sizeof(EVAL_COLOR) * _colors_len);
		}
		
		result->color = _color_buffer;
		result->len = ARRAY_count(_color_buffer);
	}
	else
	{
		result->color = _colors;
		result->len = _colors_len;
	}
}

static int is_proc(void)
{
	PATTERN pattern;
	int i;

	if (!EVAL->pattern)
		return FALSE;

	for (i = 0;; i++)
	{
		pattern = EVAL->pattern[i];
		if (PATTERN_is_end(pattern) || PATTERN_is_newline(pattern))
			return FALSE;

		if (PATTERN_is(pattern, RS_PRIVATE) || PATTERN_is(pattern, RS_PUBLIC) || PATTERN_is(pattern, RS_STATIC) || PATTERN_is(pattern, RS_FAST)|| PATTERN_is(pattern, RS_UNSAFE) || PATTERN_is_space(pattern))
			continue;

		return (PATTERN_is(pattern, RS_SUB) || PATTERN_is(pattern, RS_PROCEDURE) || PATTERN_is(pattern, RS_FUNCTION));
	}
}

static int get_symbol_indent(const char *symbol, int len)
{
	int i;
	
	for (i = 0; i < len; i++)
	{
		if (!(symbol[i] > 0 && symbol[i] < 33))
			return i;
	}
	
	return len;
}

static bool symbol_starts_with(const char *symbol, int len, int from, const char *comp)
{
	int l = strlen(comp);
	return (from < (len - l) && strncmp(&symbol[from], comp, l) == 0);
}


static int get_utf8_length(const char *s, int l)
{
	int len;
	int i;

	for (i = 0, len = 0; i < l; i++)
	{
		if ((s[i] & 0xC0) != 0x80)
			len++;
	}

	return len;
}

static void init_result()
{
	_analyze_buffer_pos = 0;
}

static void flush_result(EVAL_ANALYZE *result)
{
	if (_analyze_buffer_pos > 0)
	{
		result->str = GB.AddString(result->str, _analyze_buffer, _analyze_buffer_pos);
		_analyze_buffer_pos = 0;
	}
}

static void add_result(EVAL_ANALYZE *result, const char *str, int len)
{
	if ((_analyze_buffer_pos + len) > sizeof(_analyze_buffer))
	{
		flush_result(result);
		if (len >= sizeof(_analyze_buffer))
		{
			result->str = GB.AddString(result->str, str, len);
			return;
		}
	}
	
	memcpy(&_analyze_buffer[_analyze_buffer_pos], str, len);
	_analyze_buffer_pos += len;
}

static void add_result_char(EVAL_ANALYZE *result, char c)
{
	if ((_analyze_buffer_pos + 1) > sizeof(_analyze_buffer))
		flush_result(result);
	
	_analyze_buffer[_analyze_buffer_pos++] = c;
}

static void add_result_spaces(EVAL_ANALYZE *result, int nspace)
{
	while (nspace > 0)
	{
		add_result_char(result, ' ');
		nspace--;
	}
}

static void analyze(EVAL_ANALYZE *result)
{
	PATTERN *pattern;
	int type, old_type, next_type;
	const char *symbol;
	const char *p;
	bool space_before, space_after;
	int len, i, l;
	bool preprocessor;
	uint last;
	int len_before;

	_colors_len = 0;
	EVAL_analyze_exit();
	
	pattern = EVAL->pattern;
	preprocessor = FALSE;
	len_before = 0;

	if (EVAL->len <= 0)
		return;

	if (!pattern)
		return;

	init_result();
	
	type = EVAL->comment ? RT_COMMENT : RT_END;
	next_type = RT_END;
	old_type = RT_END;
	space_after = FALSE;

	for(;;)
	{
		type = get_type(pattern);
		
		if (type == RT_END)
			break;
		
		if (type == RT_SPACE)
		{
			len = PATTERN_index(*pattern);
			len_before += len;

			if (!EVAL->rewrite || _colors_len == 0 || PATTERN_is_end(pattern[1]) || PATTERN_is_comment(pattern[1]))
			{
				add_data(RT_SPACE, len);
				add_result_spaces(result, len);
			}

			goto __NEXT_PATTERN;
		}

		old_type = next_type;
		next_type = type;
		
		get_symbol(*pattern, &symbol, &len);

		space_before = space_after;
		space_after = FALSE;

		//if (in_quote && (type == RT_RESERVED || type == RT_DATATYPE || type == RT_SUBR))
		//	type = RT_IDENTIFIER;

		switch(type)
		{
			case RT_RESERVED:
				
				if (is_me_last_error_kind(*pattern))
				{
					if (old_type != RT_OPERATOR)
						space_before = TRUE;
					next_type = RT_IDENTIFIER;
					if (PATTERN_is(*pattern, RS_ERROR))
					{
						last = get_last_pattern(pattern);
						if (old_type == RT_END || PATTERN_is(last, RS_THEN) || PATTERN_is(last, RS_ELSE))
							space_after = TRUE;
					}
				}
				else if (is_optional_kind(*pattern))
				{
					if (old_type != RT_OPERATOR)
						space_before = TRUE;
				}
				else if (PATTERN_is_preprocessor(*pattern))
				{
					preprocessor = TRUE;
					space_before = FALSE;
				}
				else
					space_before = TRUE;
				
				break;

			case RT_DATATYPE:
				//state = Datatype;
				if (PATTERN_is(get_last_pattern(pattern), RS_OPEN))
					type = RT_RESERVED;

				if (old_type != RT_OPERATOR)
					space_before = TRUE;
				
				break;

			case RT_IDENTIFIER:
			case RT_CLASS:
				//state = Symbol;
				if (old_type != RT_OPERATOR)
					space_before = TRUE;
				break;

			case RT_INTEGER:
			case RT_NUMBER:
				//state = Number;
				if (old_type != RT_OPERATOR)
					space_before = TRUE;
				break;

			case RT_STRING:
				//state = String;
				if (old_type != RT_OPERATOR)
					space_before = TRUE;
				break;

			case RT_SUBR:
				//state = Subr;
				if (old_type != RT_OPERATOR)
					space_before = TRUE;
				break;

			case RT_COMMENT:
				//state = Commentary;
				space_before = FALSE; //*symbol != ' ';
				i = get_symbol_indent(symbol, len);
				if ((len >= 2) && (i <= (len - 2)) && (symbol[i + 1] == '\''))
					type = RT_HELP;
				else
				{
					while (i < len && (uchar)symbol[i] == '\'')
						i++;
					
					while (i < len && (uchar)symbol[i] <= ' ')
						i++;
					
					if (i < len)
					{
						if (symbol_starts_with(symbol, len, i, "NOTE:")
								|| symbol_starts_with(symbol, len, i, "TODO:")
								|| symbol_starts_with(symbol, len, i, "FIXME:"))
							type = RT_HELP;
					}
				}
				break;

			case RT_OPERATOR:

				if (index("([)]@", *symbol))
				{
					space_after = FALSE;
				}
				else if (index(":;,", *symbol))
				{
					space_before = FALSE;
					space_after = TRUE;
				}
				else if (index("#{", *symbol))
				{
					if (old_type != RT_OPERATOR)
						space_before = TRUE;
					space_after = FALSE;
					//in_quote = *symbol == '{';
					
					/*if (!preprocessor && *symbol == '#' && old_type == RT_END)
						preprocessor = TRUE;*/
				}
				else if (index("}", *symbol))
				{
					space_before = FALSE;
					space_after = FALSE;
					//in_quote = FALSE;
				}
				else if (index(".!", *symbol)) //symbol[0] == '.' && symbol[1] == 0)
				{
					//space_before = FALSE;
					space_after = FALSE;
				}
				else if (*symbol == '?')
				{
					if (EVAL->rewrite)
					{
						symbol = "Print";
						len = 5;
						type = RT_RESERVED;
						space_after = TRUE;
					}
				}
				else if (*symbol == '-' && len == 1)
				{
					if (old_type == RT_OPERATOR && (PATTERN_is(get_last_pattern(pattern), RS_LBRA) || PATTERN_is(get_last_pattern(pattern),RS_LSQR)))
						space_before = FALSE;
					else
						space_before = TRUE;
					
					if (old_type == RT_RESERVED || old_type == RT_DATATYPE)
						space_after = FALSE;
					else if (old_type == RT_OPERATOR)
					{
						get_symbol(get_last_pattern(pattern), &symbol, &len);
						if (index(")]}", *symbol))
							space_after = TRUE;
						else
							space_after = FALSE;
						get_symbol(*pattern, &symbol, &len);
					}
					else
						space_after = TRUE;
				}
				else if (PATTERN_is(*pattern, RS_NOT))
				{
					if (old_type == RT_OPERATOR && (PATTERN_is(get_last_pattern(pattern), RS_LBRA) || PATTERN_is(get_last_pattern(pattern),RS_LSQR)))
						space_before = FALSE;
					else
						space_before = TRUE;

					space_after = TRUE;
				}
				else if (PATTERN_is(*pattern, RS_WEBPAGE_LIMIT))
				{
					if (EVAL->limit)
					{
						add_result(result, symbol, len);
						add_data(RT_OPERATOR, len);
						len_before += len;

						/*for(;;)
						{
							pattern++;
							if (get_type(pattern) == RT_END)
								break;

							get_symbol(*pattern, &symbol, &len);
							add_result(result, symbol, len);
							add_data(RT_ERROR, len);
						}*/

						goto __END_PATTERN;
					}
					else
					{
						space_before = FALSE;
						space_after = FALSE;
					}
				}
				else
				{
					space_before = TRUE;
					space_after = TRUE;
				}

				if (old_type == RT_RESERVED)
					space_before = TRUE;

				break;

			case RT_ERROR:
				space_before = TRUE;
				break;
		}

		if (EVAL->rewrite && space_before && old_type != RT_END)
		{
			add_result_char(result, ' ');
			add_data(preprocessor ? RT_PREPROCESSOR : RT_SPACE, 1);
		}

		if (type == RT_STRING)
			add_result_char(result, '"');

		if (len)
		{
			if (EVAL->rewrite && type == RT_CLASS)
			{
				add_result_char(result, toupper(symbol[0]));
				if (len > 1) add_result(result, &symbol[1], len - 1);
			}
			else
				add_result(result, symbol, len);
			//printf("add: %.*s\n", len, symbol);
			len = get_utf8_length(symbol, len);
		}

		if (type == RT_STRING)
		{
			add_result_char(result, '"');
			len += 2;
		}

		len_before += len;

		if (type == RT_STRING)
		{
			if (!EVAL->rewrite && !EVAL->full)
			{
				type = RT_STRING;
			}
			else
			{
				add_data(RT_STRING, 1);
				len -= 2;
				for (i = 0, p = symbol; i < len; i++)
				{
					if (*p == '\\')
					{
						i++;
						NEXT_UTF8_CHAR(p);
						
						add_data_merge(RT_ESCAPE, 1);
						if (i < len)
						{
							if (*p == 'x' && i < (len - 2) && isxdigit(p[1]) && isxdigit(p[2]))
							{
								l = 3;
								i += 2;
							}
							else
								l = 1;
							add_data_merge(RT_ESCAPE, l);
							
							while (l--)
								NEXT_UTF8_CHAR(p);
						}
					}
					else
					{
						NEXT_UTF8_CHAR(p);
						add_data_merge(RT_STRING, 1);
					}
				}
				add_data_merge(RT_STRING, 1);
				goto __NEXT_PATTERN;
			}
		}
		else if (type == RT_IDENTIFIER)
		{
			if (PATTERN_is(get_next_pattern(pattern), RS_COLON))
			{
				for(;;)
				{
					pattern++;
					if (!PATTERN_is_space(*pattern))
						break;
					if (!EVAL->rewrite)
						add_data_merge(RT_LABEL, PATTERN_index(*pattern));
				}
				add_result_char(result, ':');
				add_data_merge(RT_LABEL, len + 1);
				space_after = TRUE;
				goto __NEXT_PATTERN;
			}
			else if (old_type == RT_RESERVED && (PATTERN_is(get_last_pattern(pattern), RS_GOTO) || PATTERN_is(get_last_pattern(pattern), RS_GOSUB)))
			{
				type = RT_LABEL;
			}
		}
		else if (type == RT_RESERVED)
		{
			if (PATTERN_is(*pattern, RS_NULL))
			{
				if (!(old_type == RT_RESERVED && PATTERN_is(get_last_pattern(pattern), RS_OPEN)))
					type = RT_CONSTANT;
			}
			else if (PATTERN_is(*pattern, RS_TRUE)
					|| PATTERN_is(*pattern, RS_FALSE)
					|| PATTERN_is(*pattern, RS_PINF)
					|| PATTERN_is(*pattern, RS_MINF))
			{
				type = RT_CONSTANT;
			}
		}
		
		if (preprocessor && type != RT_COMMENT && type != RT_HELP)
			add_data(RT_PREPROCESSOR, len);
		else
			add_data(type, len);
		//printf("add_data: %.d (%d)\n", type, len);

	__NEXT_PATTERN:
		pattern++;
	}

__END_PATTERN:

	flush_result(result);
	flush_colors(result);

	result->len_before = len_before;


	//fprintf(stderr, "analyze: %d %s\n", strlen(result->str), result->str);
}


#define add_pattern(_type, _index) EVAL->pattern[EVAL->pattern_count++] = PATTERN_make((_type), (_index));

static void add_end_pattern(void)
{
	int index;
	int len;

	len = EVAL->len - (READ_source_ptr - EVAL->source);
	if (len > 0)
	{
		index = TABLE_add_symbol(EVAL->string, READ_source_ptr, len);
		add_pattern(RT_ERROR, index);
	}
	
	add_pattern(RT_END, 0);
	//get_symbol(PATTERN_make(RT_ERROR, index), &sym, &len);
}


PUBLIC void EVAL_analyze(const char *src, int len, int state, EVAL_ANALYZE *result, bool rewrite, bool limit, bool full)
{
	//int nspace = 0;

	#ifdef DEBUG
	printf("EVAL: %*.s\n", expr->len, expr->source);
	#endif

	CLEAR(result);

	/*while (len > 0 && src[len - 1] == ' ')
	{
		len--;
		nspace++;
	}*/

	result->len = 0;
	result->str = NULL;

	if (len > 0)
	{
		EVAL = &EVAL_read_expr;

		EVAL_clear(EVAL, FALSE);
		
		EVAL->source = GB.NewString(src, len);
		EVAL->source = GB.AddString(EVAL->source, "\0\0", 2);
		EVAL->len = len;
		
		EVAL->analyze = TRUE;
		EVAL->rewrite = rewrite;
		EVAL->full = full;
		EVAL->limit = limit;
		EVAL->comment = state == RT_COMMENT;
		
		//fprintf(stderr, "EVAL_analyze: [%d] %.*s\n", EVAL->comment, len, src);

		EVAL_start(EVAL);

		TRY
		{
			EVAL_read();
		}
		CATCH
		{
			add_end_pattern();
		}
		END_TRY

		analyze(result);
		result->proc = is_proc();
		result->state = EVAL->comment ? RT_COMMENT : RT_END;

		//fprintf(stderr, "--> [%d]\n", EVAL->comment);
		
		GB.FreeString(&EVAL->source);
	}
	else
	{
		result->proc = FALSE;
	}
}


void EVAL_analyze_exit(void)
{
	if (_color_buffer)
		ARRAY_delete(&_color_buffer);
}
