/**************************************************************** * * * Copyright 2001, 2013 Fidelity Information Services, Inc * * * * This source code contains the intellectual property * * of its copyright holder(s), and is made available * * under a license. If you do not know the terms of * * the license, please stop and do not read further. * * * ****************************************************************/ /* * ----------------------------------------------------- * Lexical analyzer routines for command line interpreter * ----------------------------------------------------- */ #include "mdef.h" #include "gtm_ctype.h" #include #include "gtm_stdio.h" #include "gtm_string.h" #ifdef UNICODE_SUPPORTED #include "gtm_icu_api.h" #include "gtm_utf8.h" #endif #include "cli.h" #include "eintr_wrappers.h" #include "min_max.h" GBLDEF char cli_token_buf[MAX_LINE + 1]; /* Token buffer */ GBLREF int cmd_cnt; GBLREF char **cmd_arg; GBLDEF boolean_t gtm_cli_interpret_string = TRUE; GBLDEF IN_PARMS *cli_lex_in_ptr; #ifdef UNICODE_SUPPORTED GBLREF boolean_t gtm_utf8_mode; #define CLI_GET_CHAR(PTR, BUFEND, CHAR) (gtm_utf8_mode ? UTF8_MBTOWC(PTR, BUFEND, CHAR) : (CHAR = (wint_t)*(PTR), (PTR) + 1)) #define CLI_PUT_CHAR(PTR, CHAR) (gtm_utf8_mode ? UTF8_WCTOMB(CHAR, PTR) : (*(PTR) = CHAR, (PTR) + 1)) #define CLI_ISSPACE(CHAR) (gtm_utf8_mode ? U_ISSPACE(CHAR) : ISSPACE_ASCII((int)CHAR)) #else #define CLI_GET_CHAR(PTR, BUFEND, CHAR) (CHAR = (int)*(PTR), (PTR) + 1) #define CLI_PUT_CHAR(PTR, CHAR) (*(PTR) = CHAR, (PTR) + 1) #define CLI_ISSPACE(CHAR) ISSPACE_ASCII(CHAR) #endif /* Don't use toupper() because, with Turkish unicode settings, toupper('i') does not have well-defined behavior. On some platforms * it returns back 'i' itself. This is because, in Turkish, the actual uppercase version of 'i' is 'I' with a dot on top, which is * not an ascii character. Thus cli_strupper would incorrectly convert some qualifiers, resulting in CLIERR errors. For example it * would convert "-dynamic_literals" to "-DYNAMiC_LiTERALS" or "-warnings" to "-WARNiNGS". */ #define CLI_TOUPPER(C) (('a' <= (C) && (C) <= 'z') ? ((C) + ('A' - 'a')) : (C)) static int tok_string_extract(void) { int token_len; boolean_t have_quote, first_quote; uchar_ptr_t in_sp, out_sp, in_next, last_in_next, bufend; /* really one past last byte of buffer */ #ifdef UNICODE_SUPPORTED wint_t ch; #else int ch; #endif assert(cli_lex_in_ptr); in_sp = (uchar_ptr_t)cli_lex_in_ptr->tp; bufend = (uchar_ptr_t)&cli_lex_in_ptr->in_str[0] + cli_lex_in_ptr->buflen; out_sp = (uchar_ptr_t)cli_token_buf; token_len = 0; have_quote = FALSE; in_next = CLI_GET_CHAR(in_sp, bufend, ch); for ( ; ;) { /* '-' is not a token separator */ while (ch && !CLI_ISSPACE(ch)) { last_in_next = in_next; if (ch == '"') { if (!have_quote) { if (!gtm_cli_interpret_string) { out_sp = CLI_PUT_CHAR(out_sp, ch); token_len++; } have_quote = TRUE; in_next = CLI_GET_CHAR(in_next, bufend, ch); } else { if (!gtm_cli_interpret_string) { out_sp = CLI_PUT_CHAR(out_sp, ch); token_len++; } in_next = CLI_GET_CHAR(in_next, bufend, ch); if (ch == '"') { /* double quote, one goes in string, still have quote */ out_sp = CLI_PUT_CHAR(out_sp, ch); in_next = CLI_GET_CHAR(in_next, bufend, ch); token_len++; } else have_quote = FALSE; } } else { out_sp = CLI_PUT_CHAR(out_sp, ch); in_next = CLI_GET_CHAR(in_next, bufend, ch); token_len++; } } if (ch == '\0') { in_sp = last_in_next; /* Points to start of null char so scan ends next call */ break; } if (have_quote) { out_sp = CLI_PUT_CHAR(out_sp, ch); in_next = CLI_GET_CHAR(in_next, bufend, ch); token_len++; continue; } in_sp = in_next; break; } ch = 0; out_sp = CLI_PUT_CHAR(out_sp, ch); cli_lex_in_ptr->tp = (char *)in_sp; return (token_len); } /* * ------------------------- * Inintialize lexer * ------------------------- */ #ifdef __osf__ /* N.B. If the process is started by mumps, argv passed in from main (in gtm.c) is almost straight from the operating system. * if the process is started externally (call-ins), argc and argv are 0 and NULL respectively */ #pragma pointer_size (save) #pragma pointer_size (long) #endif void cli_lex_setup (int argc, char **argv) { int parmlen, parmindx; char **parmptr; #ifdef __osf__ #pragma pointer_size (restore) #endif #ifdef KEEP_zOS_EBCDIC __argvtoascii_a(argc, argv); #endif cmd_cnt = argc; cmd_arg = (char **)argv; /* Quickly run through the parameters to get a ballpark on the size of the string needed to store them. */ for (parmindx = 1, parmptr = argv, parmlen = 0; parmindx <= argc; parmptr++, parmindx++) parmlen += STRLEN(*parmptr) + 1; parmlen = parmlen + PARM_OVHD; /* Extraneous extras, etc. */ parmlen = (parmlen > MAX_LINE ? MAX_LINE : parmlen) + 1; /* call-ins may repeatedly initialize cli_lex_setup for every invocation of gtm_init() */ if (!cli_lex_in_ptr || parmlen > cli_lex_in_ptr->buflen) { /* We have the cure for a missing or unusable buffer */ if (cli_lex_in_ptr) free(cli_lex_in_ptr); cli_lex_in_ptr = (IN_PARMS *)malloc(SIZEOF(IN_PARMS) + parmlen); cli_lex_in_ptr->buflen = parmlen; } cli_lex_in_ptr->argc = argc; cli_lex_in_ptr->argv = argv; cli_lex_in_ptr->in_str[0] = '\0'; cli_lex_in_ptr->tp = NULL; } void cli_str_setup(int addrlen, char *addr) { int alloclen; assert(cli_lex_in_ptr); alloclen = (addrlen > MAX_LINE ? MAX_LINE : addrlen) + 1; if (!cli_lex_in_ptr || alloclen > cli_lex_in_ptr->buflen) { /* We have the cure for a missing or unusable buffer */ if (cli_lex_in_ptr) free(cli_lex_in_ptr); cli_lex_in_ptr = (IN_PARMS *)malloc(SIZEOF(IN_PARMS) + alloclen); cli_lex_in_ptr->buflen = alloclen; } cli_lex_in_ptr->argv = NULL; cli_lex_in_ptr->argc = 0; cli_lex_in_ptr->tp = cli_lex_in_ptr->in_str; addrlen = MIN(addrlen, alloclen - 1); memcpy(cli_lex_in_ptr->in_str, addr, addrlen); (cli_lex_in_ptr->in_str)[addrlen] = '\0'; } /* * --------------------------------------------------------------- * Convert string to upper case. Do it only for ascii characters. * --------------------------------------------------------------- */ void cli_strupper(char *sp) { int c; while (c = *sp) *sp++ = IS_ASCII(c) ? CLI_TOUPPER(c) : c; } /* * ------------------------------------------------------- * Check if string is a Hex number * * Return: * TRUE - identifier * FALSE - otherwise * ------------------------------------------------------- */ int cli_is_hex(char *p) { if (('+' == *p) || ('-' == *p)) p++; if (('0' == *p) && ('X' == CLI_TOUPPER(*(p + 1)))) { p = p + 2; } while (*p && ISXDIGIT_ASCII(*p)) p++; return ((*p) ? FALSE : TRUE); } /* * ------------------------------------------------------- * Check if token is a qualifier * * Return: * TRUE - qualifier * FALSE - otherwise * ------------------------------------------------------- */ int cli_is_qualif(char *p) { return (*p == '-'); } /* * ------------------------------------------------------- * Check if token is an assignment symbol * * Return: * TRUE - assignment * FALSE - otherwise * ------------------------------------------------------- */ int cli_is_assign(char *p) { return (*p == '='); } /* ---------------------------------------------- * Routine to skip white space while reading. * Called when a parameter has to be read. * The tok_string_extract () doesnt remove * starting spaces while reading a string. * To make use of that while reading a parameter * this has to be called first. * ---------------------------------------------- */ void skip_white_space(void) { uchar_ptr_t in_sp; #ifdef UNICODE_SUPPORTED wint_t ch; uchar_ptr_t next_sp, bufend; #endif assert(cli_lex_in_ptr); in_sp = (uchar_ptr_t)cli_lex_in_ptr->tp; #ifdef UNICODE_SUPPORTED if (gtm_utf8_mode) { bufend = (uchar_ptr_t)(cli_lex_in_ptr->in_str + cli_lex_in_ptr->buflen); for ( ; ; ) { next_sp = UTF8_MBTOWC(in_sp, bufend, ch); if (!U_ISSPACE(ch)) break; in_sp = next_sp; } } else #endif while(ISSPACE_ASCII((int)*in_sp)) in_sp++; cli_lex_in_ptr->tp = (char *)in_sp; } /* * -------------------------------------------- * Extract one token from a string. * Token is anything between the separator characters * or separator character itself, if it is '-' or '='. * * Return: * token Length * -------------------------------------------- */ static int tok_extract (void) { int token_len; uchar_ptr_t in_sp, in_next, out_sp, bufend; #ifdef UNICODE_SUPPORTED wint_t ch; #else int ch; #endif assert(cli_lex_in_ptr); skip_white_space(); /* Skip leading blanks */ in_sp = (uchar_ptr_t)cli_lex_in_ptr->tp; bufend = (uchar_ptr_t)&cli_lex_in_ptr->in_str[0] + cli_lex_in_ptr->buflen; out_sp = (uchar_ptr_t)cli_token_buf; token_len = 0; in_next = CLI_GET_CHAR(in_sp, bufend, ch); if ('-' == ch || '=' == ch) { out_sp = CLI_PUT_CHAR(out_sp, ch); in_sp = in_next; /* advance one character */ token_len = 1; } else if (ch) /* only if something there */ { /* smw if quotable, need to unicode isspace (BYPASSOK) */ /* '-' is not a token separator */ while(ch && !CLI_ISSPACE(ch) && ch != '=') { out_sp = CLI_PUT_CHAR(out_sp, ch); in_sp = in_next; in_next = CLI_GET_CHAR(in_next, bufend, ch); token_len++; } } ch = 0; out_sp = CLI_PUT_CHAR(out_sp, ch); cli_lex_in_ptr->tp = (char *)in_sp; return(token_len); } static void cli_lex_in_expand(int in_len) { IN_PARMS *new_cli_lex_in_ptr; new_cli_lex_in_ptr = (IN_PARMS *)malloc(SIZEOF(IN_PARMS) + in_len); new_cli_lex_in_ptr->argc = cli_lex_in_ptr->argc; new_cli_lex_in_ptr->argv = cli_lex_in_ptr->argv; new_cli_lex_in_ptr->buflen = in_len; /* in_str[1] accounts for null */ free(cli_lex_in_ptr); cli_lex_in_ptr = new_cli_lex_in_ptr; } char *cli_fgets(char *buffer, int buffersize, FILE *fp, boolean_t cli_lex_str) { size_t in_len; char cli_fgets_buffer[MAX_LINE], *destbuffer, *retptr; #ifdef UNICODE_SUPPORTED int mbc_len, u16_off, destsize; int32_t mbc_dest_len; UErrorCode errorcode; UChar *uc_fgets_ret; UChar32 uc32_cp; UChar cli_fgets_Ubuffer[MAX_LINE]; UFILE *u_fp; #endif #ifdef UNICODE_SUPPORTED if (gtm_utf8_mode) { cli_fgets_Ubuffer[0] = 0; if (!cli_lex_str) assert(MAX_LINE >= buffersize); u_fp = u_finit(fp, NULL, UTF8_NAME); if (NULL != u_fp) { do { /* no f_ferror */ uc_fgets_ret = u_fgets(cli_fgets_Ubuffer, (int32_t)(SIZEOF(cli_fgets_Ubuffer) / SIZEOF(UChar)) - 1, u_fp); } while (NULL == uc_fgets_ret && !u_feof(u_fp) && ferror(fp) && EINTR == errno); if (NULL == uc_fgets_ret) { if (cli_lex_str) cli_lex_in_ptr->tp = NULL; u_fclose(u_fp); return NULL; } in_len = u_strlen(cli_fgets_Ubuffer); in_len = trim_U16_line_term(cli_fgets_Ubuffer, (int)in_len); for (u16_off = 0, mbc_len = 0; u16_off < in_len; ) { U16_NEXT(cli_fgets_Ubuffer, u16_off, in_len, uc32_cp); mbc_len += U8_LENGTH(uc32_cp); if (!cli_lex_str && mbc_len >= buffersize) { /* can't expand */ mbc_len = buffersize - 1; cli_fgets_Ubuffer[u16_off] = 0; U16_BACK_1(cli_fgets_Ubuffer, 0, u16_off); in_len = u16_off + 1; /* offset to length */ break; } } if (cli_lex_str) { if (mbc_len > cli_lex_in_ptr->buflen) cli_lex_in_expand(mbc_len); /* for terminating null */ destsize = cli_lex_in_ptr->buflen + 1; destbuffer = cli_lex_in_ptr->in_str; } else { /* very unlikely parm is larger than MAX_LINE even i UTF-8 */ if (mbc_len >= buffersize) destsize = buffersize - 1; /* for null */ else destsize = buffersize; destbuffer = buffer; } errorcode = U_ZERO_ERROR; u_strToUTF8(destbuffer, destsize, &mbc_dest_len, cli_fgets_Ubuffer, (int4)in_len + 1, &errorcode); if (U_FAILURE(errorcode)) if (U_BUFFER_OVERFLOW_ERROR == errorcode) { /* truncate so null terminated */ destbuffer[destsize - 1] = 0; retptr = destbuffer; } else retptr = NULL; else retptr = destbuffer; /* Repoint to new home */ if (cli_lex_str) cli_lex_in_ptr->tp = retptr; u_fclose(u_fp); } else if (cli_lex_str) cli_lex_in_ptr->tp = NULL; } else { #endif cli_fgets_buffer[0] = '\0'; FGETS_FILE(cli_fgets_buffer, SIZEOF(cli_fgets_buffer), fp, retptr); if (NULL != retptr) { in_len = strlen(cli_fgets_buffer); if (cli_lex_str) { if (cli_lex_in_ptr->buflen < in_len) cli_lex_in_expand((int)in_len); destbuffer = cli_lex_in_ptr->in_str; } else { assert(SIZEOF(cli_fgets_buffer) >= buffersize); destbuffer = buffer; } retptr = destbuffer; /* return proper buffer */ if ('\n' == cli_fgets_buffer[in_len - 1]) cli_fgets_buffer[in_len - 1] = '\0'; /* replace NL */ memcpy(destbuffer, cli_fgets_buffer, in_len); if (cli_lex_str) cli_lex_in_ptr->tp = destbuffer; } else if (cli_lex_str) cli_lex_in_ptr->tp = NULL; #ifdef UNICODE_SUPPORTED } #endif return retptr; } /* * ------------------------------------------------------- * Get token * * Return: * Token Length * * Side effects: * set eof to <> 0 for EOF condition. * ------------------------------------------------------- */ int cli_gettoken (int *eof) { int arg_no, token_len, in_len; char *from, *to; IN_PARMS *new_cli_lex_in_ptr; char *tmp_tp; assert(cli_lex_in_ptr); /* Reading from program argument list */ if (cli_lex_in_ptr->argc > 1 && cli_lex_in_ptr->tp == 0) { cli_lex_in_ptr->tp = cli_lex_in_ptr->in_str; arg_no = 1; /* convert arguments into array */ while(arg_no < cli_lex_in_ptr->argc) { if (arg_no > 1) strcat(cli_lex_in_ptr->in_str, " "); if (strlen(cli_lex_in_ptr->in_str) + strlen(cli_lex_in_ptr->argv[arg_no]) > MAX_LINE) break; strcat(cli_lex_in_ptr->in_str, cli_lex_in_ptr->argv[arg_no++]); } } if (NULL == cli_lex_in_ptr->tp || strlen(cli_lex_in_ptr->tp) < 1) { cli_token_buf[0] = '\0'; /* cli_fgets can malloc/free cli_lex_in_ptr. Passing in TRUE as last parameter will do the set * to cli_lex_in_ptr->tp within cli_fgets() after any malloc/free, thus avoiding the problem of * writing to freed memory if the set were done here. */ cli_fgets(cli_lex_in_ptr->in_str, MAX_LINE, stdin, TRUE); if (NULL != cli_lex_in_ptr->tp) *eof = 0; else { *eof = EOF; return (0); } } token_len = tok_extract(); *eof = (cli_lex_in_ptr->argc > 1 && token_len == 0); return token_len; } /* * -------------------------------------------- * Copy next token to the token buffer. * Do not advance the token pointer. * * Return: * Token Length * * Side effects: * set eof to <> 0 for EOF condition. * ------------------------------------------------------- */ int cli_look_next_token(int *eof) { int tok_len; char *old_tp; assert(cli_lex_in_ptr); if (((char *) NULL == cli_lex_in_ptr->tp) || (!strlen(cli_lex_in_ptr->tp))) return(0); old_tp = cli_lex_in_ptr->tp; tok_len = cli_gettoken(eof); cli_lex_in_ptr->tp = old_tp; return(tok_len); } int cli_look_next_string_token(int *eof) { int tok_len; char *old_tp; assert(cli_lex_in_ptr); if (!strlen(cli_lex_in_ptr->tp)) return(0); old_tp = cli_lex_in_ptr->tp; tok_len = cli_get_string_token(eof); cli_lex_in_ptr->tp = old_tp; return(tok_len); } int cli_get_string_token(int *eof) { int arg_no, token_len, in_len; char *from, *to; IN_PARMS *new_cli_lex_in_ptr; assert(cli_lex_in_ptr); /* Reading from program argument list */ if (cli_lex_in_ptr->argc > 1 && cli_lex_in_ptr->tp == 0) { cli_lex_in_ptr->tp = cli_lex_in_ptr->in_str; arg_no = 1; /* convert arguments into array */ while(arg_no < cli_lex_in_ptr->argc) { if (arg_no > 1) strcat(cli_lex_in_ptr->in_str, " "); if (strlen(cli_lex_in_ptr->in_str) + strlen(cli_lex_in_ptr->argv[arg_no]) > MAX_LINE) break; if (cli_has_space(cli_lex_in_ptr->argv[arg_no])) { from = cli_lex_in_ptr->argv[arg_no++]; to = cli_lex_in_ptr->in_str + strlen(cli_lex_in_ptr->in_str) - 1; *to++ = '\"'; while(*from != '\0') { if ('\"' == *from) *to++ = *from; *to++ = *from++; } *to++ = '\"'; *to = '\0'; } else strcat(cli_lex_in_ptr->in_str, cli_lex_in_ptr->argv[arg_no++]); } } if (NULL == cli_lex_in_ptr->tp || strlen(cli_lex_in_ptr->tp) < 1) { cli_token_buf[0] = '\0'; /* cli_fgets can malloc/free cli_lex_in_ptr. Passing in TRUE as last parameter will do the set * to cli_lex_in_ptr->tp within cli_fgets() after any malloc/free, thus avoiding the problem of * writing to freed memory if the set were done here. */ cli_fgets(cli_lex_in_ptr->in_str, MAX_LINE, stdin, TRUE); if (NULL != cli_lex_in_ptr->tp) *eof = 0; else { *eof = EOF; return (0); } } token_len = tok_string_extract(); *eof = (cli_lex_in_ptr->argc > 1 && token_len == 0); return token_len; } /* * ------------------------------------------------------- * Check if string has space in it * * Return: * TRUE - identifier * FALSE - otherwise * ------------------------------------------------------- */ int cli_has_space(char *p) { #ifdef UNICODE_SUPPORTED uchar_ptr_t local_p, next_p, bufend; wint_t ch; if (gtm_utf8_mode) { local_p = (uchar_ptr_t)p; bufend = local_p + strlen(p); while (local_p) { next_p = UTF8_MBTOWC(local_p, bufend, ch); if (!ch || U_ISSPACE(ch)) break; local_p = next_p; } p = (char *)local_p; } else #endif while (*p && !ISSPACE_ASCII(*p)) p++; return ((*p) ? (TRUE) : (FALSE)); }