fis-gtm/sr_port/is_canonic_name.c

360 lines
9.2 KiB
C

/****************************************************************
* *
* Copyright 2001, 2011 Fidelity Information Services, Inc *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
* the license, please stop and do not read further. *
* *
****************************************************************/
#include "mdef.h"
#include "gtm_ctype.h"
#include "is_canonic_name.h"
#ifdef DEBUG
#include "subscript.h"
#endif
#ifdef UNICODE_SUPPORTED
#include "gtm_utf8.h"
GBLREF boolean_t badchar_inhibit;
error_def(ERR_BADCHAR);
#endif
/*
* -----------------------------------------------
* is_canonic_name()
* validate a variable name
*
* Arguments:
* src - Pointer to Source Name string mval
* subscripts - Pointer to sequence number of subscript to find & return of subscript count
* start_off - Pointer offset of the component requested by op_fnqsubscript
* stop_off - Pointer offset of the end of the component requested by op_fnqsubscript
* Return:
* boolean_t - TRUE indicates good name; FALSE indicates defective
* -----------------------------------------------
*/
boolean_t is_canonic_name(mval *src, int *subscripts, int *start_off, int *stop_off)
{ /* subscripts is overloaded - out to op_fnqlength, which doesn't use the last 2 arguments & in from op_fnqsubscript */
char term;
int envpart;
boolean_t instring;
int isrc;
boolean_t keep_quotes;
char letter;
int point;
char previous;
int seq;
int start;
int state;
int stop;
int subs_count;
int utf8_len;
/* state:
* 0 before start of name
* 1 found ^ allow environment
* 2 dispatch for starting a component
* 3 in string
* 4 in number
* 5 expect first letter of name
* 6 expect next letter of name
* 7 in $CHAR()
* 8 at end of processing
*/
MV_FORCE_STR(src);
seq = *subscripts;
keep_quotes = FALSE;
start = stop = 0;
state = 0;
subs_count = -1;
for (isrc = 0; isrc < src->str.len; )
{
letter = src->str.addr[isrc];
switch (state)
{
case 0: /* start of name */
if ('^' == letter) /* before start of name */
{
state = 1; /* check for environment */
break;
}
if (('%' == letter) || ISALPHA_ASCII(letter))
{
if (0 == seq)
start = isrc;
state = 6; /* rest of name */
break;
}
return FALSE;
case 1: /* global name */
if (('%' == letter) ||ISALPHA_ASCII(letter)) /* found ^ allow environment */
{ /* found ^ allow environment */
if (0 == seq)
start = isrc;
state = 6; /* rest of name */
break;
}
if (('|' == letter) || ('[' == letter))
{
term = (letter == '[') ? ']' : letter;
envpart = 0;
if (subs_count == seq)
start = isrc + 1;
state = 2; /* process environment */
break;
}
return FALSE;
case 2: /* dispatch for starting a component */
point = 0;
instring = FALSE;
if (envpart > 1)
return FALSE; /* too many environment components */
if (')' == term)
subs_count++; /* new subscript */
else
envpart++; /* next environment component */
if ((subs_count == seq) && (0 == stop))
start = isrc;
if ('"' == letter)
{
if ((subs_count == seq) && (1 == envpart))
start++;
instring = TRUE;
state = 3; /* string */
break;
}
if ('$' ==letter)
{
state = 7; /* $[z]char() */
break;
}
if ('0' == letter) /* Canonic number cannot start with 0 unless is single char */
{
if (++isrc < src->str.len)
letter = src->str.addr[isrc];
else
return FALSE; /* Cannot end with "0" */
if (term == letter)
state = (')' == term) ? 8 : 5; /* end or name */
else if (',' != letter)
return FALSE; /* Not a single char number */
if ((subs_count == seq) && (0 == stop))
stop = isrc;
break;
}
if (('-' == letter) || ('.' == letter) || ISDIGIT_ASCII(letter))
{
if ('.' == letter)
point++;
previous = letter;
state = 4; /* numeric */
break;
}
return FALSE;
case 3: /* [quoted] string */
if ('"' == letter) /* in string */
{
instring = !instring;
if (instring)
break;
if (isrc + 1 >= src->str.len)
return FALSE;
if ('_' != src->str.addr[isrc + 1])
break;
isrc++;
if (++isrc < src->str.len)
letter = src->str.addr[isrc];
else
return FALSE;
if ('$' != letter)
return FALSE;
state = 7; /* $[z]char() */
break;
}
if (!instring)
{
if (',' == letter)
state = 2; /* on to next */
else if (term == letter)
state = (')' == term) ? 8 : 5; /* end or name */
else
return FALSE;
if ((subs_count == seq) && (0 == stop))
/* Not returning 2nd env part - maybe problem */
stop = isrc - (keep_quotes ? 0 : 1);
}
break;
case 4: /* numeric */
if (ISDIGIT_ASCII(letter)) /* in number */
{
if (('-' == previous) && ('0' == letter))
return FALSE;
previous = letter;
break;
}
if ('.' == letter)
{
if ((++point > 1))
return FALSE;
previous = letter;
break;
}
if (point && ('0' == previous))
return FALSE;
if (',' == letter)
state = 2; /* next */
else if (term == letter)
state = (')' == term) ? 8 : 5; /* end or name */
else
return FALSE;
if ((subs_count == seq) && (0 == stop))
stop = isrc;
previous = letter;
break;
case 5: /* expect first letter of name */
if (('%' == letter) || ISALPHA_ASCII(letter))
{
if (0 == seq)
start = isrc;
state = 6; /* rest of name */
break;
}
return FALSE;
case 6: /* expect next letter of name */
if ('(' == letter)
{
term = ')';
envpart = 1;
subs_count = 0;
state = 2; /* done with name */
if (0 == seq)
stop = isrc;
} else if (!ISALNUM_ASCII(letter))
return FALSE;
break;
case 7: /* $[Z]CHAR() */
previous = letter; /* in $CHAR() - must be ASCII */
if (('Z' == letter) || ('z' == letter))
{ if (++isrc < src->str.len)
letter = src->str.addr[isrc];
else
return FALSE;
if ('z' == previous)
previous = 'Z';
}
if (!(('C' == letter) || ('c' == letter)))
return FALSE;
if (++isrc < src->str.len)
letter = src->str.addr[isrc];
else
return FALSE;
if (('H' == letter) || ('h' == letter))
{
if (++isrc < src->str.len)
letter = src->str.addr[isrc];
else
return FALSE;
if (!(('A' == letter) || ('a' == letter) || (('(' == letter) && ('Z' == previous))))
return FALSE;
} else if ('Z' == previous)
return FALSE;
if ('(' != letter)
{
if (++isrc < src->str.len)
letter = src->str.addr[isrc];
else
return FALSE;
if (!('R' == letter) || ('r' == letter))
return FALSE;
if (++isrc < src->str.len)
letter = src->str.addr[isrc];
else
return FALSE;
}
if ('(' != letter)
return FALSE;
if (subs_count == seq)
keep_quotes = TRUE;
for (++isrc ;isrc < src->str.len; isrc++)
{
letter = src->str.addr[isrc];
if (ISDIGIT_ASCII(letter))
continue;
if (!((',' == letter) || (')' == letter)))
return FALSE;
previous = letter;
if (++isrc < src->str.len)
letter = src->str.addr[isrc];
else
return FALSE;
if (')' == previous)
break;
if (!ISDIGIT_ASCII(letter))
return FALSE;
}
if (isrc > src->str.len)
return FALSE;
if ('_' == letter)
{
if (++isrc < src->str.len)
letter = src->str.addr[isrc];
else
return FALSE;
if ('$' == letter)
break;
if ('"' != letter)
return FALSE;
instring = TRUE;
state = 3; /* back to string */
break;
}
if (',' == letter)
state = 2;
else if (term == letter)
state = (')' == term) ? 8 : 5; /* end or name */
else
return FALSE;
if ((subs_count == seq) && (0 == stop))
stop = isrc - (keep_quotes ? 0 : 1); /* Not returning 2nd env part - maybe problem */
break;
case 8: /* end of subscript but no closing paren - ")" */
return FALSE;
break;
}
# ifdef UNICODE_SUPPORTED
if (!gtm_utf8_mode || (0 == (letter & 0x80)))
isrc++;
else if (0 < (utf8_len = UTF8_MBFOLLOW(&src->str.addr[isrc++])))
{ /* multi-byte increment */
assert(4 > utf8_len);
if (0 > utf8_len)
rts_error(VARLSTCNT(6) ERR_BADCHAR, 4, 1, &src->str.addr[isrc - 1], LEN_AND_LIT(UTF8_NAME));
isrc += utf8_len;
}
# endif
NON_UNICODE_ONLY(isrc++);
}
if ((8 != state) && (6 != state))
return FALSE;
if ((0 <= seq) && (0 == stop))
stop = src->str.len - (8 == state ? 1 : 0);
if (keep_quotes && ('"' == src->str.addr[start - 1]))
start--;
assert((0 < subs_count) || ((6 == state) && (-1 == subs_count)));
if (6 == state)
subs_count = 0;
assert((('^' == src->str.addr[0]) ? MAX_GVSUBSCRIPTS : MAX_LVSUBSCRIPTS) > subs_count);
assert((0 < isrc) && (isrc == src->str.len));
assert(stop <= isrc);
assert((0 <= start) && (start <= stop));
*subscripts = subs_count;
*start_off = start;
*stop_off = stop;
return TRUE;
}