fis-gtm/sr_unix/cli_lex.c

694 lines
18 KiB
C

/****************************************************************
* *
* Copyright 2001, 2013 Fidelity Information Services, Inc *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
* the license, please stop and do not read further. *
* *
****************************************************************/
/*
* -----------------------------------------------------
* Lexical analyzer routines for command line interpreter
* -----------------------------------------------------
*/
#include "mdef.h"
#include "gtm_ctype.h"
#include <errno.h>
#include "gtm_stdio.h"
#include "gtm_string.h"
#ifdef UNICODE_SUPPORTED
#include "gtm_icu_api.h"
#include "gtm_utf8.h"
#endif
#include "cli.h"
#include "eintr_wrappers.h"
#include "min_max.h"
GBLDEF char cli_token_buf[MAX_LINE + 1]; /* Token buffer */
GBLREF int cmd_cnt;
GBLREF char **cmd_arg;
GBLDEF boolean_t gtm_cli_interpret_string = TRUE;
GBLDEF IN_PARMS *cli_lex_in_ptr;
#ifdef UNICODE_SUPPORTED
GBLREF boolean_t gtm_utf8_mode;
#define CLI_GET_CHAR(PTR, BUFEND, CHAR) (gtm_utf8_mode ? UTF8_MBTOWC(PTR, BUFEND, CHAR) : (CHAR = (wint_t)*(PTR), (PTR) + 1))
#define CLI_PUT_CHAR(PTR, CHAR) (gtm_utf8_mode ? UTF8_WCTOMB(CHAR, PTR) : (*(PTR) = CHAR, (PTR) + 1))
#define CLI_ISSPACE(CHAR) (gtm_utf8_mode ? U_ISSPACE(CHAR) : ISSPACE_ASCII((int)CHAR))
#else
#define CLI_GET_CHAR(PTR, BUFEND, CHAR) (CHAR = (int)*(PTR), (PTR) + 1)
#define CLI_PUT_CHAR(PTR, CHAR) (*(PTR) = CHAR, (PTR) + 1)
#define CLI_ISSPACE(CHAR) ISSPACE_ASCII(CHAR)
#endif
/* Don't use toupper() because, with Turkish unicode settings, toupper('i') does not have well-defined behavior. On some platforms
* it returns back 'i' itself. This is because, in Turkish, the actual uppercase version of 'i' is 'I' with a dot on top, which is
* not an ascii character. Thus cli_strupper would incorrectly convert some qualifiers, resulting in CLIERR errors. For example it
* would convert "-dynamic_literals" to "-DYNAMiC_LiTERALS" or "-warnings" to "-WARNiNGS".
*/
#define CLI_TOUPPER(C) (('a' <= (C) && (C) <= 'z') ? ((C) + ('A' - 'a')) : (C))
static int tok_string_extract(void)
{
int token_len;
boolean_t have_quote, first_quote;
uchar_ptr_t in_sp, out_sp, in_next, last_in_next,
bufend; /* really one past last byte of buffer */
#ifdef UNICODE_SUPPORTED
wint_t ch;
#else
int ch;
#endif
assert(cli_lex_in_ptr);
in_sp = (uchar_ptr_t)cli_lex_in_ptr->tp;
bufend = (uchar_ptr_t)&cli_lex_in_ptr->in_str[0] + cli_lex_in_ptr->buflen;
out_sp = (uchar_ptr_t)cli_token_buf;
token_len = 0;
have_quote = FALSE;
in_next = CLI_GET_CHAR(in_sp, bufend, ch);
for ( ; ;)
{
/* '-' is not a token separator */
while (ch && !CLI_ISSPACE(ch))
{
last_in_next = in_next;
if (ch == '"')
{
if (!have_quote)
{
if (!gtm_cli_interpret_string)
{
out_sp = CLI_PUT_CHAR(out_sp, ch);
token_len++;
}
have_quote = TRUE;
in_next = CLI_GET_CHAR(in_next, bufend, ch);
} else
{
if (!gtm_cli_interpret_string)
{
out_sp = CLI_PUT_CHAR(out_sp, ch);
token_len++;
}
in_next = CLI_GET_CHAR(in_next, bufend, ch);
if (ch == '"')
{ /* double quote, one goes in string, still have quote */
out_sp = CLI_PUT_CHAR(out_sp, ch);
in_next = CLI_GET_CHAR(in_next, bufend, ch);
token_len++;
} else
have_quote = FALSE;
}
} else
{
out_sp = CLI_PUT_CHAR(out_sp, ch);
in_next = CLI_GET_CHAR(in_next, bufend, ch);
token_len++;
}
}
if (ch == '\0')
{
in_sp = last_in_next; /* Points to start of null char so scan ends next call */
break;
}
if (have_quote)
{
out_sp = CLI_PUT_CHAR(out_sp, ch);
in_next = CLI_GET_CHAR(in_next, bufend, ch);
token_len++;
continue;
}
in_sp = in_next;
break;
}
ch = 0;
out_sp = CLI_PUT_CHAR(out_sp, ch);
cli_lex_in_ptr->tp = (char *)in_sp;
return (token_len);
}
/*
* -------------------------
* Inintialize lexer
* -------------------------
*/
#ifdef __osf__
/* N.B. If the process is started by mumps, argv passed in from main (in gtm.c) is almost straight from the operating system.
* if the process is started externally (call-ins), argc and argv are 0 and NULL respectively */
#pragma pointer_size (save)
#pragma pointer_size (long)
#endif
void cli_lex_setup (int argc, char **argv)
{
int parmlen, parmindx;
char **parmptr;
#ifdef __osf__
#pragma pointer_size (restore)
#endif
#ifdef KEEP_zOS_EBCDIC
__argvtoascii_a(argc, argv);
#endif
cmd_cnt = argc;
cmd_arg = (char **)argv;
/* Quickly run through the parameters to get a ballpark on the
size of the string needed to store them.
*/
for (parmindx = 1, parmptr = argv, parmlen = 0; parmindx <= argc; parmptr++, parmindx++)
parmlen += STRLEN(*parmptr) + 1;
parmlen = parmlen + PARM_OVHD; /* Extraneous extras, etc. */
parmlen = (parmlen > MAX_LINE ? MAX_LINE : parmlen) + 1;
/* call-ins may repeatedly initialize cli_lex_setup for every invocation of gtm_init() */
if (!cli_lex_in_ptr || parmlen > cli_lex_in_ptr->buflen)
{ /* We have the cure for a missing or unusable buffer */
if (cli_lex_in_ptr)
free(cli_lex_in_ptr);
cli_lex_in_ptr = (IN_PARMS *)malloc(SIZEOF(IN_PARMS) + parmlen);
cli_lex_in_ptr->buflen = parmlen;
}
cli_lex_in_ptr->argc = argc;
cli_lex_in_ptr->argv = argv;
cli_lex_in_ptr->in_str[0] = '\0';
cli_lex_in_ptr->tp = NULL;
}
void cli_str_setup(int addrlen, char *addr)
{
int alloclen;
assert(cli_lex_in_ptr);
alloclen = (addrlen > MAX_LINE ? MAX_LINE : addrlen) + 1;
if (!cli_lex_in_ptr || alloclen > cli_lex_in_ptr->buflen)
{ /* We have the cure for a missing or unusable buffer */
if (cli_lex_in_ptr)
free(cli_lex_in_ptr);
cli_lex_in_ptr = (IN_PARMS *)malloc(SIZEOF(IN_PARMS) + alloclen);
cli_lex_in_ptr->buflen = alloclen;
}
cli_lex_in_ptr->argv = NULL;
cli_lex_in_ptr->argc = 0;
cli_lex_in_ptr->tp = cli_lex_in_ptr->in_str;
addrlen = MIN(addrlen, alloclen - 1);
memcpy(cli_lex_in_ptr->in_str, addr, addrlen);
(cli_lex_in_ptr->in_str)[addrlen] = '\0';
}
/*
* ---------------------------------------------------------------
* Convert string to upper case. Do it only for ascii characters.
* ---------------------------------------------------------------
*/
void cli_strupper(char *sp)
{
int c;
while (c = *sp)
*sp++ = IS_ASCII(c) ? CLI_TOUPPER(c) : c;
}
/*
* -------------------------------------------------------
* Check if string is a Hex number
*
* Return:
* TRUE - identifier
* FALSE - otherwise
* -------------------------------------------------------
*/
int cli_is_hex(char *p)
{
if (('+' == *p) || ('-' == *p))
p++;
if (('0' == *p) && ('X' == CLI_TOUPPER(*(p + 1))))
{
p = p + 2;
}
while (*p && ISXDIGIT_ASCII(*p))
p++;
return ((*p) ? FALSE : TRUE);
}
/*
* -------------------------------------------------------
* Check if token is a qualifier
*
* Return:
* TRUE - qualifier
* FALSE - otherwise
* -------------------------------------------------------
*/
int cli_is_qualif(char *p)
{
return (*p == '-');
}
/*
* -------------------------------------------------------
* Check if token is an assignment symbol
*
* Return:
* TRUE - assignment
* FALSE - otherwise
* -------------------------------------------------------
*/
int cli_is_assign(char *p)
{
return (*p == '=');
}
/* ----------------------------------------------
* Routine to skip white space while reading.
* Called when a parameter has to be read.
* The tok_string_extract () doesnt remove
* starting spaces while reading a string.
* To make use of that while reading a parameter
* this has to be called first.
* ----------------------------------------------
*/
void skip_white_space(void)
{
uchar_ptr_t in_sp;
#ifdef UNICODE_SUPPORTED
wint_t ch;
uchar_ptr_t next_sp, bufend;
#endif
assert(cli_lex_in_ptr);
in_sp = (uchar_ptr_t)cli_lex_in_ptr->tp;
#ifdef UNICODE_SUPPORTED
if (gtm_utf8_mode)
{
bufend = (uchar_ptr_t)(cli_lex_in_ptr->in_str + cli_lex_in_ptr->buflen);
for ( ; ; )
{
next_sp = UTF8_MBTOWC(in_sp, bufend, ch);
if (!U_ISSPACE(ch))
break;
in_sp = next_sp;
}
}
else
#endif
while(ISSPACE_ASCII((int)*in_sp))
in_sp++;
cli_lex_in_ptr->tp = (char *)in_sp;
}
/*
* --------------------------------------------
* Extract one token from a string.
* Token is anything between the separator characters
* or separator character itself, if it is '-' or '='.
*
* Return:
* token Length
* --------------------------------------------
*/
static int tok_extract (void)
{
int token_len;
uchar_ptr_t in_sp, in_next, out_sp, bufend;
#ifdef UNICODE_SUPPORTED
wint_t ch;
#else
int ch;
#endif
assert(cli_lex_in_ptr);
skip_white_space(); /* Skip leading blanks */
in_sp = (uchar_ptr_t)cli_lex_in_ptr->tp;
bufend = (uchar_ptr_t)&cli_lex_in_ptr->in_str[0] + cli_lex_in_ptr->buflen;
out_sp = (uchar_ptr_t)cli_token_buf;
token_len = 0;
in_next = CLI_GET_CHAR(in_sp, bufend, ch);
if ('-' == ch || '=' == ch)
{
out_sp = CLI_PUT_CHAR(out_sp, ch);
in_sp = in_next; /* advance one character */
token_len = 1;
} else if (ch) /* only if something there */
{
/* smw if quotable, need to unicode isspace (BYPASSOK) */
/* '-' is not a token separator */
while(ch && !CLI_ISSPACE(ch)
&& ch != '=')
{
out_sp = CLI_PUT_CHAR(out_sp, ch);
in_sp = in_next;
in_next = CLI_GET_CHAR(in_next, bufend, ch);
token_len++;
}
}
ch = 0;
out_sp = CLI_PUT_CHAR(out_sp, ch);
cli_lex_in_ptr->tp = (char *)in_sp;
return(token_len);
}
static void cli_lex_in_expand(int in_len)
{
IN_PARMS *new_cli_lex_in_ptr;
new_cli_lex_in_ptr = (IN_PARMS *)malloc(SIZEOF(IN_PARMS) + in_len);
new_cli_lex_in_ptr->argc = cli_lex_in_ptr->argc;
new_cli_lex_in_ptr->argv = cli_lex_in_ptr->argv;
new_cli_lex_in_ptr->buflen = in_len; /* in_str[1] accounts for null */
free(cli_lex_in_ptr);
cli_lex_in_ptr = new_cli_lex_in_ptr;
}
char *cli_fgets(char *buffer, int buffersize, FILE *fp, boolean_t cli_lex_str)
{
size_t in_len;
char cli_fgets_buffer[MAX_LINE], *destbuffer, *retptr;
#ifdef UNICODE_SUPPORTED
int mbc_len, u16_off, destsize;
int32_t mbc_dest_len;
UErrorCode errorcode;
UChar *uc_fgets_ret;
UChar32 uc32_cp;
UChar cli_fgets_Ubuffer[MAX_LINE];
UFILE *u_fp;
#endif
#ifdef UNICODE_SUPPORTED
if (gtm_utf8_mode)
{
cli_fgets_Ubuffer[0] = 0;
if (!cli_lex_str)
assert(MAX_LINE >= buffersize);
u_fp = u_finit(fp, NULL, UTF8_NAME);
if (NULL != u_fp)
{
do
{ /* no f_ferror */
uc_fgets_ret = u_fgets(cli_fgets_Ubuffer,
(int32_t)(SIZEOF(cli_fgets_Ubuffer) / SIZEOF(UChar)) - 1, u_fp);
} while (NULL == uc_fgets_ret && !u_feof(u_fp) && ferror(fp) && EINTR == errno);
if (NULL == uc_fgets_ret)
{
if (cli_lex_str)
cli_lex_in_ptr->tp = NULL;
u_fclose(u_fp);
return NULL;
}
in_len = u_strlen(cli_fgets_Ubuffer);
in_len = trim_U16_line_term(cli_fgets_Ubuffer, (int)in_len);
for (u16_off = 0, mbc_len = 0; u16_off < in_len; )
{
U16_NEXT(cli_fgets_Ubuffer, u16_off, in_len, uc32_cp);
mbc_len += U8_LENGTH(uc32_cp);
if (!cli_lex_str && mbc_len >= buffersize)
{ /* can't expand */
mbc_len = buffersize - 1;
cli_fgets_Ubuffer[u16_off] = 0;
U16_BACK_1(cli_fgets_Ubuffer, 0, u16_off);
in_len = u16_off + 1; /* offset to length */
break;
}
}
if (cli_lex_str)
{
if (mbc_len > cli_lex_in_ptr->buflen)
cli_lex_in_expand(mbc_len); /* for terminating null */
destsize = cli_lex_in_ptr->buflen + 1;
destbuffer = cli_lex_in_ptr->in_str;
} else
{ /* very unlikely parm is larger than MAX_LINE even i UTF-8 */
if (mbc_len >= buffersize)
destsize = buffersize - 1; /* for null */
else
destsize = buffersize;
destbuffer = buffer;
}
errorcode = U_ZERO_ERROR;
u_strToUTF8(destbuffer, destsize, &mbc_dest_len, cli_fgets_Ubuffer, (int4)in_len + 1, &errorcode);
if (U_FAILURE(errorcode))
if (U_BUFFER_OVERFLOW_ERROR == errorcode)
{ /* truncate so null terminated */
destbuffer[destsize - 1] = 0;
retptr = destbuffer;
} else
retptr = NULL;
else
retptr = destbuffer; /* Repoint to new home */
if (cli_lex_str)
cli_lex_in_ptr->tp = retptr;
u_fclose(u_fp);
} else if (cli_lex_str)
cli_lex_in_ptr->tp = NULL;
} else
{
#endif
cli_fgets_buffer[0] = '\0';
FGETS_FILE(cli_fgets_buffer, SIZEOF(cli_fgets_buffer), fp, retptr);
if (NULL != retptr)
{
in_len = strlen(cli_fgets_buffer);
if (cli_lex_str)
{
if (cli_lex_in_ptr->buflen < in_len)
cli_lex_in_expand((int)in_len);
destbuffer = cli_lex_in_ptr->in_str;
} else
{
assert(SIZEOF(cli_fgets_buffer) >= buffersize);
destbuffer = buffer;
}
retptr = destbuffer; /* return proper buffer */
if ('\n' == cli_fgets_buffer[in_len - 1])
cli_fgets_buffer[in_len - 1] = '\0'; /* replace NL */
memcpy(destbuffer, cli_fgets_buffer, in_len);
if (cli_lex_str)
cli_lex_in_ptr->tp = destbuffer;
} else if (cli_lex_str)
cli_lex_in_ptr->tp = NULL;
#ifdef UNICODE_SUPPORTED
}
#endif
return retptr;
}
/*
* -------------------------------------------------------
* Get token
*
* Return:
* Token Length
*
* Side effects:
* set eof to <> 0 for EOF condition.
* -------------------------------------------------------
*/
int cli_gettoken (int *eof)
{
int arg_no, token_len, in_len;
char *from, *to;
IN_PARMS *new_cli_lex_in_ptr;
char *tmp_tp;
assert(cli_lex_in_ptr);
/* Reading from program argument list */
if (cli_lex_in_ptr->argc > 1 && cli_lex_in_ptr->tp == 0)
{
cli_lex_in_ptr->tp = cli_lex_in_ptr->in_str;
arg_no = 1;
/* convert arguments into array */
while(arg_no < cli_lex_in_ptr->argc)
{
if (arg_no > 1)
strcat(cli_lex_in_ptr->in_str, " ");
if (strlen(cli_lex_in_ptr->in_str)
+ strlen(cli_lex_in_ptr->argv[arg_no]) > MAX_LINE)
break;
strcat(cli_lex_in_ptr->in_str, cli_lex_in_ptr->argv[arg_no++]);
}
}
if (NULL == cli_lex_in_ptr->tp || strlen(cli_lex_in_ptr->tp) < 1)
{
cli_token_buf[0] = '\0';
/* cli_fgets can malloc/free cli_lex_in_ptr. Passing in TRUE as last parameter will do the set
* to cli_lex_in_ptr->tp within cli_fgets() after any malloc/free, thus avoiding the problem of
* writing to freed memory if the set were done here.
*/
cli_fgets(cli_lex_in_ptr->in_str, MAX_LINE, stdin, TRUE);
if (NULL != cli_lex_in_ptr->tp)
*eof = 0;
else
{
*eof = EOF;
return (0);
}
}
token_len = tok_extract();
*eof = (cli_lex_in_ptr->argc > 1 && token_len == 0);
return token_len;
}
/*
* --------------------------------------------
* Copy next token to the token buffer.
* Do not advance the token pointer.
*
* Return:
* Token Length
*
* Side effects:
* set eof to <> 0 for EOF condition.
* -------------------------------------------------------
*/
int cli_look_next_token(int *eof)
{
int tok_len;
char *old_tp;
assert(cli_lex_in_ptr);
if (((char *) NULL == cli_lex_in_ptr->tp) || (!strlen(cli_lex_in_ptr->tp)))
return(0);
old_tp = cli_lex_in_ptr->tp;
tok_len = cli_gettoken(eof);
cli_lex_in_ptr->tp = old_tp;
return(tok_len);
}
int cli_look_next_string_token(int *eof)
{
int tok_len;
char *old_tp;
assert(cli_lex_in_ptr);
if (!strlen(cli_lex_in_ptr->tp))
return(0);
old_tp = cli_lex_in_ptr->tp;
tok_len = cli_get_string_token(eof);
cli_lex_in_ptr->tp = old_tp;
return(tok_len);
}
int cli_get_string_token(int *eof)
{
int arg_no, token_len, in_len;
char *from, *to;
IN_PARMS *new_cli_lex_in_ptr;
assert(cli_lex_in_ptr);
/* Reading from program argument list */
if (cli_lex_in_ptr->argc > 1 && cli_lex_in_ptr->tp == 0)
{
cli_lex_in_ptr->tp = cli_lex_in_ptr->in_str;
arg_no = 1;
/* convert arguments into array */
while(arg_no < cli_lex_in_ptr->argc)
{
if (arg_no > 1)
strcat(cli_lex_in_ptr->in_str, " ");
if (strlen(cli_lex_in_ptr->in_str) + strlen(cli_lex_in_ptr->argv[arg_no]) > MAX_LINE)
break;
if (cli_has_space(cli_lex_in_ptr->argv[arg_no]))
{
from = cli_lex_in_ptr->argv[arg_no++];
to = cli_lex_in_ptr->in_str + strlen(cli_lex_in_ptr->in_str) - 1;
*to++ = '\"';
while(*from != '\0')
{
if ('\"' == *from)
*to++ = *from;
*to++ = *from++;
}
*to++ = '\"';
*to = '\0';
} else
strcat(cli_lex_in_ptr->in_str, cli_lex_in_ptr->argv[arg_no++]);
}
}
if (NULL == cli_lex_in_ptr->tp || strlen(cli_lex_in_ptr->tp) < 1)
{
cli_token_buf[0] = '\0';
/* cli_fgets can malloc/free cli_lex_in_ptr. Passing in TRUE as last parameter will do the set
* to cli_lex_in_ptr->tp within cli_fgets() after any malloc/free, thus avoiding the problem of
* writing to freed memory if the set were done here.
*/
cli_fgets(cli_lex_in_ptr->in_str, MAX_LINE, stdin, TRUE);
if (NULL != cli_lex_in_ptr->tp)
*eof = 0;
else
{
*eof = EOF;
return (0);
}
}
token_len = tok_string_extract();
*eof = (cli_lex_in_ptr->argc > 1 && token_len == 0);
return token_len;
}
/*
* -------------------------------------------------------
* Check if string has space in it
*
* Return:
* TRUE - identifier
* FALSE - otherwise
* -------------------------------------------------------
*/
int cli_has_space(char *p)
{
#ifdef UNICODE_SUPPORTED
uchar_ptr_t local_p, next_p, bufend;
wint_t ch;
if (gtm_utf8_mode)
{
local_p = (uchar_ptr_t)p;
bufend = local_p + strlen(p);
while (local_p)
{
next_p = UTF8_MBTOWC(local_p, bufend, ch);
if (!ch || U_ISSPACE(ch))
break;
local_p = next_p;
}
p = (char *)local_p;
}
else
#endif
while (*p && !ISSPACE_ASCII(*p))
p++;
return ((*p) ? (TRUE) : (FALSE));
}