/****************************************************************
 *								*
 *	Copyright 2001, 2011 Fidelity Information Services, Inc	*
 *								*
 *	This source code contains the intellectual property	*
 *	of its copyright holder(s), and is made available	*
 *	under a license.  If you do not know the terms of	*
 *	the license, please stop and do not read further.	*
 *								*
 ****************************************************************/

#include "mdef.h"

#include "gtm_string.h"

#include "compiler.h"
#include "toktyp.h"
#include "stringpool.h"
#include "gtm_caseconv.h"
#include "advancewindow.h"
#include "show_source_line.h"

#ifdef UNICODE_SUPPORTED
#include "gtm_utf8.h"
#include "gtm_icu_api.h"	/* U_ISPRINT() needs this header */
#endif

GBLREF	unsigned char	*source_buffer;
GBLREF	short int	source_column;
GBLREF	char		*lexical_ptr;
GBLREF	spdesc		stringpool;
GBLREF	boolean_t	gtm_utf8_mode;
GBLREF	boolean_t	run_time;

LITREF	char		ctypetab[NUM_CHARS];

error_def(ERR_LITNONGRAPH);
error_def(ERR_NUMOFLOW);

static readonly unsigned char apos_ok[] =
{
	0,TK_NEXCLAIMATION,0,0,0,0,TK_NAMPERSAND,0
	,0,0,0,0,0,0,0,0
	,0,0,0,0,0,0,0,0
	,0,0,0,0,TK_NLESS,TK_NEQUAL,TK_NGREATER,TK_NQUESTION
	,0,0,0,0,0,0,0,0
	,0,0,0,0,0,0,0,0
	,0,0,0,0,0,0,0,0
	,0,0,0,TK_NLBRACKET,0,TK_NRBRACKET,0,0
};

void advancewindow(void)
{
	unsigned char	*cp1, *cp2, *cp3, x;
	char		*tmp, source_line_buff[MAX_SRCLINE + SIZEOF(ARROW)];
	int		y, charlen;
#	ifdef UNICODE_SUPPORTED
	uint4		ch;
	unsigned char	*cptr;
#	endif
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	TREF(last_source_column) = source_column;
	source_column = (unsigned char *)lexical_ptr - source_buffer + 1;
	TREF(window_token) = TREF(director_token);
	TREF(window_mval) = TREF(director_mval);
	(TREF(director_mval)).mvtype = 0; /* keeps mval from being GC'd since it is not useful until re-used */
	tmp = (TREF(window_ident)).addr;  /* More efficient to swap pointers between window_ident.addr & director_ident.addr */
	TREF(window_ident) = TREF(director_ident);	/* than to copy text from director_ident to window_ident */
	(TREF(director_ident)).addr = tmp;
	x = *lexical_ptr;
	switch (y = ctypetab[x])
	{
	case TK_EOL:
		TREF(director_token) = TK_EOL;
		return;		/* if next character is terminator, avoid incrementing past it */
	case TK_QUOTE:
		ENSURE_STP_FREE_SPACE(MAX_SRCLINE);
		cp1 = (unsigned char *)lexical_ptr + 1;
		cp2 = cp3 = stringpool.free;
		for (;;)
		{
#			ifdef UNICODE_SUPPORTED
			if (gtm_utf8_mode)
				cptr = (unsigned char *)UTF8_MBTOWC((sm_uc_ptr_t)cp1, source_buffer + MAX_SRCLINE, ch);
#			endif
			x = *cp1++;
			if ((SP > x) UNICODE_ONLY(|| (gtm_utf8_mode && !(U_ISPRINT(ch)))))
			{
				TREF(last_source_column) = cp1 - source_buffer;
				if ('\0' == x)
				{
					TREF(director_token) = TREF(window_token) = TK_ERROR;
					return;
				}
				if (!run_time)
				{
					show_source_line(source_line_buff, SIZEOF(source_line_buff), TRUE);
					dec_err(VARLSTCNT(1) ERR_LITNONGRAPH);
				}
			}
			if ('\"' == x)
			{
				UNICODE_ONLY(assert(!gtm_utf8_mode || (cp1 == cptr)));
				if ('\"' == *cp1)
					cp1++;
				else
					break;
			}
			*cp2++ = x;
#			ifdef UNICODE_SUPPORTED
			if (gtm_utf8_mode && (cptr > cp1))
			{
				assert(4 > (cptr - cp1));
				for (; cptr > cp1;)
					*cp2++ = *cp1++;
			}
#			endif
			assert(cp2 <= stringpool.top);
		}
		lexical_ptr = (char *)cp1;
		TREF(director_token) = TK_STRLIT;
		(TREF(director_mval)).mvtype = MV_STR;
		(TREF(director_mval)).str.addr = (char *)cp3;
		(TREF(director_mval)).str.len = INTCAST(cp2 - cp3);
		stringpool.free = cp2;
		s2n(&(TREF(director_mval)));
#		ifdef UNICODE_SUPPORTED
		if (gtm_utf8_mode && !run_time)
		{	/* UTF8 mode and not compiling an indirect gets an optimization to set the
			 * (true) length of the string into the mval
			 */
			charlen = utf8_len_stx(&(TREF(director_mval)).str);
			if (0 > charlen)	/* got a BADCHAR error */
				TREF(director_token) = TK_ERROR;
			else
			{
				assert(charlen == (TREF(director_mval)).str.char_len);
				(TREF(director_mval)).mvtype |= MV_UTF_LEN;
			}
		}
#		endif
		return;
	case TK_LOWER:
	case TK_PERCENT:
	case TK_UPPER:
		cp2 = (unsigned char *)((TREF(director_ident)).addr);
		cp3 = cp2 + MAX_MIDENT_LEN;
		for (;;)
		{
			if (cp2 < cp3)
				*cp2++ = x;
			y = ctypetab[x = *++lexical_ptr];
			if ((TK_UPPER != y) && (TK_DIGIT != y) && (TK_LOWER != y))
				break;
		}
		(TREF(director_ident)).len = INTCAST(cp2 - (unsigned char*)(TREF(director_ident)).addr);
		TREF(director_token) = TK_IDENT;
		return;
	case TK_PERIOD:
		if (ctypetab[x = *(lexical_ptr + 1)] != TK_DIGIT)
			break;
	case TK_DIGIT:
		(TREF(director_mval)).str.addr = lexical_ptr;
		(TREF(director_mval)).str.len = MAX_SRCLINE;
		(TREF(director_mval)).mvtype = MV_STR;
		lexical_ptr = (char *)s2n(&(TREF(director_mval)));
		if (!((TREF(director_mval)).mvtype &= MV_NUM_MASK))
		{
			stx_error(ERR_NUMOFLOW);
			TREF(director_token) = TK_ERROR;
			return;
		}
		if (TREF(s2n_intlit))
		{
			TREF(director_token) = TK_NUMLIT ;
			n2s(&(TREF(director_mval)));
		} else
		{
			TREF(director_token) = TK_INTLIT ;
			(TREF(director_mval)).str.len = INTCAST(lexical_ptr - (TREF(director_mval)).str.addr);
			ENSURE_STP_FREE_SPACE((TREF(director_mval)).str.len);
			memcpy(stringpool.free, (TREF(director_mval)).str.addr, (TREF(director_mval)).str.len);
			assert (stringpool.free <= stringpool.top) ;
		}
		return;
	case TK_APOSTROPHE:
		if (( x = *++lexical_ptr) >= 32)
		{
			x -= 32;
			if (x < SIZEOF(apos_ok) / SIZEOF(unsigned char))
			{
				if (y = apos_ok[x])
				{
					if (DEL < (x = *++lexical_ptr))
					{
						TREF(director_token) = TK_ERROR;
						return;
					}
					if (TK_RBRACKET == ctypetab[x])
					{
						lexical_ptr++;
						y = TK_NSORTS_AFTER;
					}
					TREF(director_token) = y;
					return;
				}
			}
		}
		TREF(director_token) = TK_APOSTROPHE;
		return;
	case TK_SEMICOLON:
		while (*++lexical_ptr)
			;
		assert(TK_EOL == ctypetab[*lexical_ptr]);
		TREF(director_token) = TK_EOL;
		return;		/* if next character is terminator, avoid incrementing past it */
	case TK_ASTERISK:
		if (DEL < (x = *(lexical_ptr + 1)))
		{
			TREF(director_token) = TK_ERROR;
			return;
		}
		if (TK_ASTERISK == ctypetab[x])
		{
			lexical_ptr++;
			y = TK_EXPONENT;
		}
		break;
	case TK_RBRACKET:
		if ((x = *(lexical_ptr + 1)) > DEL)
		{
			TREF(director_token) = TK_ERROR;
			return;
		}
		if (TK_RBRACKET == ctypetab[x])
		{
			lexical_ptr++;
			y = TK_SORTS_AFTER;
		}
		break;
	default:
		;
	}
	lexical_ptr++;
	TREF(director_token) = y;
	return;
}

#ifdef GTM_TRIGGER
/* The M standard does not allow the '#' character to appear inside mnames but in specific places, we want to allow this
 * so that triggers, which have the imbedded '#' character in their routine names, can be debugged and printed. The places
 * where this is allowed follow.
 *
 *   1. $TEXT()
 *   2. ZBREAK
 *   3. ZPRINT
 *
 * All other uses still prohibit '#' from being in an MNAME. Routines that need to allow # in a name can call this routine to
 * recombine the existing token and the look-ahead (director) token such that '#' is considered part of an mident.
 */
void advwindw_hash_in_mname_allowed(void)
{
	unsigned char	*cp2, *cp3, x;
	unsigned char	ident_buffer[SIZEOF(mident_fixed)];
	int		ident_len, ch;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	assert(TK_IDENT == TREF(window_token));
	assert(TK_HASH == TREF(director_token));
	/* First copy the existing token we want to expand into our safe-haven */
	memcpy(ident_buffer, (TREF(window_ident)).addr, (TREF(window_ident)).len);
	/* Now parse further until we run out of [m]ident */
	cp2 = ident_buffer + (TREF(window_ident)).len;
	cp3 = ident_buffer + MAX_MIDENT_LEN;
	*cp2++ = '#';	/* We are only called if director token is '#' so put that char in buffer now */
	/* Start processing with the token following the '#' */
	for (x = *lexical_ptr, ch = ctypetab[x];
	     ((TK_UPPER == ch) || (TK_DIGIT == ch) || (TK_LOWER == ch) || (TK_HASH == ch));
	     x = *++lexical_ptr, ch = ctypetab[x])
	{
		if (cp2 < cp3)
			*cp2++ = x;
	}
	(TREF(director_ident)).len = INTCAST(cp2 - ident_buffer);
	TREF(director_token) = TK_IDENT;
	memcpy((TREF(director_ident)).addr, ident_buffer, (TREF(director_ident)).len);
	advancewindow();	/* Makes the homogenized token the current token (again) and prereads next token */
}
#endif