560 lines
16 KiB
C
560 lines
16 KiB
C
/****************************************************************
|
|
* *
|
|
* Copyright 2001, 2009 Fidelity Information Services, Inc *
|
|
* *
|
|
* This source code contains the intellectual property *
|
|
* of its copyright holder(s), and is made available *
|
|
* under a license. If you do not know the terms of *
|
|
* the license, please stop and do not read further. *
|
|
* *
|
|
****************************************************************/
|
|
|
|
/* This facility loads a pattern table from the file specified.
|
|
Returns success as a Boolean. A sample pattern table definition
|
|
file follows:
|
|
|
|
PATSTART
|
|
PATTABLE EDM
|
|
PATCODE c
|
|
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,-
|
|
24,25,26,27,28,29,30,31,127,128,129,130,131,132,133,134,135,136,-
|
|
137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,-
|
|
153,154,155,156,157,158,159,255
|
|
PATCODE n
|
|
48,49,50,51,52,53,54,55,56,57
|
|
PATCODE u
|
|
65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,-
|
|
86,87,88,89,90,192,193,194,195,196,197,198,199,200,201,202,203,-
|
|
204,205,206,207,209,210,211,212,213,214,215,216,217,218,219,220,-
|
|
221
|
|
PATCODE K
|
|
66,67,68,70,71,72,74,75,76,77,78,80,81,82,83,84,86,87,88,89,90,-
|
|
98,99,100,102,103,104,106,107,108,109,110,112,113,114,115,116,-
|
|
118,119,120,121,122
|
|
PATCODE l
|
|
97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,-
|
|
114,115,116,117,118,119,120,121,122,170,186,223,224,225,226,227,-
|
|
228,229,230,231,232,233,234,235,236,237,238,239,241,242,243,244,-
|
|
245,246,247,248,249,250,251,252,253
|
|
PATCODE p
|
|
32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,58,59,60,61,62,-
|
|
63,64,91,92,93,94,95,96,123,124,125,126,160,161,162,163,164,165,-
|
|
166,167,168,169,171,172,173,174,175,176,177,178,179,180,181,182,-
|
|
183,184,185,187,188,189,190,191,208,222,240,254
|
|
PATCODE V
|
|
65,69,73,79,85,89,-
|
|
97,101,105,111,117,121
|
|
PATEND
|
|
|
|
Note that this table does not include a definition for pattern codes A and E.
|
|
A is implicitly defined as the union of L and U, and E is implicitly defined as the union of all other classes.
|
|
|
|
; This is a test of the GT.M/I18N user-definable pattern-
|
|
; match table definition.
|
|
;
|
|
patsTaRt
|
|
PattaBLE Example
|
|
PATCODE A ; WARNING: patcodes A and E cannot be re-defined
|
|
42, 46, 43, 75 - ; comments after continuation
|
|
, 5, -
|
|
63, 91, 92, 93 ; comments at end of directive
|
|
PATcode u ; This is an explicit U code definition
|
|
92, 127, 128, 255
|
|
PaTcOdE V ; GT.M specific user-defined code
|
|
102, 104, 109, 121
|
|
patcode YcntY
|
|
65, 69, 73, 79, 85 ; ANSI user-defined patcode
|
|
PaTeNd
|
|
*/
|
|
|
|
#include "mdef.h"
|
|
|
|
#include "gtm_string.h"
|
|
|
|
#ifdef VMS
|
|
#include <lnmdef.h>
|
|
#include <fab.h>
|
|
#include <rab.h>
|
|
#include <rmsdef.h>
|
|
|
|
#include "vmsdtype.h"
|
|
|
|
#else
|
|
#include "gtm_stdio.h"
|
|
#endif
|
|
|
|
#include "patcode.h"
|
|
#include "iosp.h"
|
|
#include "io.h"
|
|
#ifdef UNIX
|
|
#include "eintr_wrappers.h"
|
|
#endif
|
|
#include "util.h"
|
|
#include "trans_log_name.h"
|
|
#include "gtm_logicals.h"
|
|
|
|
#define MAXPATNAM 256
|
|
#define MAX_FILE 256
|
|
|
|
enum {T_EOF = 256, T_NL, T_SYNTAX, T_NUMBER, T_IDENT, K_PATSTART, K_PATEND, K_PATTABLE, K_PATCODE};
|
|
|
|
GBLREF uint4 mapbit[];
|
|
GBLREF uint4 pat_allmaskbits;
|
|
LITREF uint4 typemask[PATENTS];
|
|
|
|
GBLREF uint4 *pattern_typemask;
|
|
GBLREF pattern *pattern_list;
|
|
GBLREF pattern *curr_pattern;
|
|
GBLREF pattern mumps_pattern;
|
|
|
|
LITREF unsigned char lower_to_upper_table[];
|
|
|
|
static int pat_linenum = 0;
|
|
static int token;
|
|
static unsigned char ident[MAXPATNAM + 1];
|
|
static int idlen;
|
|
static int number, max_patents;
|
|
static char *ch = NULL;
|
|
static char patline[MAXPATNAM + 2];
|
|
|
|
#ifdef VMS
|
|
static struct FAB fab;
|
|
static struct RAB rab;
|
|
#else
|
|
static FILE *patfile;
|
|
#endif
|
|
|
|
#ifdef DEBUG
|
|
void dump_tables(void);
|
|
#endif
|
|
|
|
static void close_patfile(void);
|
|
static int getaline(void);
|
|
static int open_patfile(int name_len, char *file_name);
|
|
static int pat_lex(void);
|
|
static int patcmp(unsigned char *str1, unsigned char *str2);
|
|
static void pattab_error(int name_len, char *file_name, int linenum);
|
|
|
|
static void close_patfile(void)
|
|
{
|
|
#ifdef VMS
|
|
sys$close(&fab);
|
|
#else
|
|
int fclose_res;
|
|
FCLOSE(patfile, fclose_res);
|
|
#endif
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
void dump_tables(void)
|
|
{
|
|
int mx;
|
|
char mout;
|
|
pattern **patp;
|
|
|
|
for (patp = &pattern_list; NULL != *patp; patp = &(*patp)->flink)
|
|
{
|
|
util_out_print("!/Pattern Table \"!AD\":!/", TRUE, LEN_AND_STR((*patp)->name));
|
|
for (mx = 0; mx < max_patents; mx++)
|
|
{
|
|
if (mx >= 32 && mx < 127)
|
|
{
|
|
mout = mx;
|
|
util_out_print("!3UL: !XL ('!AD')!/", TRUE, mx, (*patp)->typemask[mx], 1, &mout);
|
|
} else
|
|
{
|
|
util_out_print("!3UL: !XL!/", TRUE, mx, (*patp)->typemask[mx]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
static int getaline(void)
|
|
{
|
|
int status;
|
|
|
|
#ifdef VMS
|
|
status = sys$get(&rab);
|
|
if (RMS$_EOF == status)
|
|
return 0;
|
|
patline[rab.rab$w_rsz] = '\n';
|
|
patline[rab.rab$w_rsz + 1] = '\0';
|
|
#else
|
|
char *fgets_res;
|
|
if (NULL == FGETS(patline, SIZEOF(patline), patfile, fgets_res))
|
|
return 0;
|
|
#endif
|
|
return 1;
|
|
}
|
|
|
|
int getpattabnam(mstr *outname)
|
|
{
|
|
outname->addr = curr_pattern->name;
|
|
outname->len = curr_pattern->namlen;
|
|
return 1;
|
|
}
|
|
|
|
int initialize_pattern_table(void)
|
|
{
|
|
char buffer[MAX_TRANS_NAME_LEN];
|
|
int status, letter;
|
|
mstr patname, transnam;
|
|
static MSTR_CONST(pat_file, PAT_FILE);
|
|
static MSTR_CONST(pat_table, PAT_TABLE);
|
|
|
|
/* Initialize the pattern/typemask table size. Note that in UTF-8 mode, we
|
|
* only use the lower half of the table (0 - 127). Although we do not extend
|
|
* user-defined patcodes for multi-byte characters, we still need to allow
|
|
* user defined patcodes for the entire ASCII charset (0 - 127) in UTF-8 mode.
|
|
*/
|
|
max_patents = (gtm_utf8_mode ? PATENTS_UTF8 : PATENTS);
|
|
/* Initialize pattern/typemask structures and pat_allmaskbits for default typemask */
|
|
curr_pattern = pattern_list = &mumps_pattern;
|
|
pattern_typemask = mumps_pattern.typemask = (uint4 *)&(typemask[0]);
|
|
for (pat_allmaskbits = 0, letter = 0; letter < max_patents; letter++)
|
|
pat_allmaskbits |= pattern_typemask[letter]; /* used in do_patfixed/do_pattern */
|
|
/* Locate default pattern file and load it. */
|
|
status = TRANS_LOG_NAME(&pat_file, &transnam, buffer, SIZEOF(buffer), do_sendmsg_on_log2long);
|
|
if (SS_NORMAL != status)
|
|
return 0;
|
|
if (!load_pattern_table(transnam.len, transnam.addr))
|
|
return 0;
|
|
/* Establish default pattern table. */
|
|
status = TRANS_LOG_NAME(&pat_table,&transnam,buffer, SIZEOF(buffer), do_sendmsg_on_log2long);
|
|
if (SS_NORMAL != status)
|
|
return 0;
|
|
patname.len = transnam.len;
|
|
patname.addr = transnam.addr;
|
|
return setpattab(&patname);
|
|
}
|
|
|
|
int load_pattern_table(int name_len,char *file_name)
|
|
{
|
|
unsigned char newtabnam[MAXPATNAM + 1], newYZnam[PAT_YZMAXNUM][PAT_YZMAXLEN];
|
|
int code, cmp, cnt, newtable[PATENTS], newnamlen, newYZlen[PAT_YZMAXNUM];
|
|
int newYZnum = -1; /* number of ANSI user-defined patcodes */
|
|
pattern *newpat, **patp ;
|
|
|
|
if (!open_patfile(name_len, file_name))
|
|
return 0;
|
|
pat_linenum = 1;
|
|
while (T_NL == (token = pat_lex()))
|
|
;
|
|
if (K_PATSTART == token)
|
|
{
|
|
if (T_NL != (token = pat_lex()))
|
|
{
|
|
util_out_print("Unrecognized text at end of line", TRUE);
|
|
pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */
|
|
}
|
|
while (T_NL == (token = pat_lex()))
|
|
;
|
|
while (K_PATTABLE == token)
|
|
{ /* Set up a pattern table record. */
|
|
if (T_IDENT != (token = pat_lex()))
|
|
{
|
|
util_out_print("Identifier expected, found !AD", TRUE, idlen, ident);
|
|
pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */
|
|
}
|
|
newnamlen = idlen;
|
|
memcpy(newtabnam, ident, newnamlen + 1);
|
|
if (T_NL != (token = pat_lex()))
|
|
{
|
|
util_out_print("Unrecognized text at end of line", TRUE);
|
|
pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */
|
|
}
|
|
while (T_NL == (token = pat_lex()))
|
|
;
|
|
/* Process PATCODE directives */
|
|
memset(&newtable[0], 0, max_patents * SIZEOF(newtable[0]));
|
|
for (cnt = 0; cnt < PAT_YZMAXNUM; cnt++)
|
|
newYZlen[cnt] = 0;
|
|
newYZnum = -1;
|
|
while (K_PATCODE == token)
|
|
{
|
|
if (T_IDENT != (token = pat_lex()))
|
|
{
|
|
util_out_print("Identifier expected, found !AD", TRUE, idlen, ident);
|
|
pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */
|
|
}
|
|
code = lower_to_upper_table[ident[0]];
|
|
if (idlen > 1)
|
|
{
|
|
if (((code != 'Y') && (code != 'Z')) || (ident[0] != ident[idlen - 1]))
|
|
{
|
|
util_out_print("User-defined pattern code (!AD) not delimited by Y or Z",
|
|
TRUE, idlen, ident);
|
|
pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */
|
|
}
|
|
if (idlen > PAT_YZMAXLEN)
|
|
{
|
|
util_out_print("Length of pattern code name (!AD) longer than maximum !UL",
|
|
TRUE, idlen, ident, PAT_YZMAXLEN);
|
|
pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */
|
|
}
|
|
newYZnum++;
|
|
if (newYZnum >= PAT_YZMAXNUM)
|
|
{
|
|
util_out_print("Number of user-defined patcodes exceeds maximum (!UL)",
|
|
TRUE, PAT_YZMAXNUM);
|
|
pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */
|
|
}
|
|
newYZlen[newYZnum] = idlen;
|
|
memcpy(newYZnam[newYZnum], ident, idlen);
|
|
code = newYZnum + 'Y';
|
|
util_out_print("WARNING: Pattern code !AD not yet implemented", TRUE, idlen, ident);
|
|
} else
|
|
{
|
|
if (code > 'X')
|
|
{
|
|
util_out_print("Invalid pattern letter (!AD)", TRUE, idlen, ident);
|
|
pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */
|
|
}
|
|
if ((code == 'E') || (code == 'A'))
|
|
{
|
|
util_out_print("Attempt to redefine pattern code !AD", TRUE, idlen, ident);
|
|
pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */
|
|
}
|
|
}
|
|
code = code - 'A';
|
|
if (T_NL != (token = pat_lex()))
|
|
{
|
|
util_out_print("Unrecognized text at end of line", TRUE);
|
|
pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */
|
|
}
|
|
while (T_NL == (token = pat_lex()))
|
|
;
|
|
/* Process character list setting the code's flag into the typemask */
|
|
if (T_NUMBER == token)
|
|
{
|
|
if (number >= max_patents)
|
|
{
|
|
util_out_print("Character code greater than !UL encountered (!UL)",
|
|
TRUE, max_patents - 1, number);
|
|
pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */
|
|
}
|
|
newtable[number] |= mapbit[code];
|
|
while (',' == (token = pat_lex()))
|
|
{
|
|
if (T_NUMBER != (token = pat_lex()))
|
|
{
|
|
util_out_print("Numeric character code expected, found !AD",
|
|
TRUE, idlen, ident);
|
|
pattab_error(name_len, file_name, pat_linenum); /* error does not return */
|
|
}
|
|
if (number >= max_patents)
|
|
{
|
|
util_out_print("Character code greater than !UL encountered (!UL)",
|
|
TRUE, max_patents - 1, number);
|
|
pattab_error(name_len, file_name, pat_linenum); /* error does not return */
|
|
}
|
|
newtable[number] |= mapbit[code];
|
|
}
|
|
if (T_NL != token)
|
|
{
|
|
util_out_print("Unrecognized text at end of line", TRUE);
|
|
pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */
|
|
}
|
|
while (T_NL == (token = pat_lex()))
|
|
;
|
|
}
|
|
}
|
|
for (patp = &pattern_list; (*patp) ; patp = &(*patp)->flink)
|
|
{
|
|
cmp = patcmp(newtabnam, (uchar_ptr_t)(*patp)->name);
|
|
if (0 == cmp)
|
|
{ /* don't read in same table name twice */
|
|
util_out_print("Cannot load table !AD twice", TRUE, newnamlen, newtabnam);
|
|
pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */
|
|
} else if (cmp < 0)
|
|
break;
|
|
}
|
|
newpat = (pattern *) malloc(SIZEOF(pattern) + newnamlen);
|
|
newpat->flink = (*patp);
|
|
newpat->namlen = newnamlen;
|
|
memcpy(newpat->name, newtabnam, newnamlen + 1);
|
|
newpat->typemask = (uint4 *) malloc(max_patents * SIZEOF(typemask[0]));
|
|
memcpy(newpat->typemask, newtable, max_patents * SIZEOF(typemask[0]));
|
|
newpat->patYZnam = (unsigned char *) malloc(SIZEOF(newYZnam));
|
|
memcpy(newpat->patYZnam, newYZnam, SIZEOF(newYZnam));
|
|
newpat->patYZlen = (int *) malloc(SIZEOF(newYZlen));
|
|
memcpy(newpat->patYZlen, newYZlen, SIZEOF(newYZlen));
|
|
newpat->patYZnum = newYZnum;
|
|
(*patp) = newpat;
|
|
}
|
|
if (K_PATEND != token)
|
|
{
|
|
util_out_print("End of definition marker (PATEND) expected", TRUE);
|
|
pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */
|
|
}
|
|
while (T_NL == (token = pat_lex()))
|
|
;
|
|
if (T_EOF != token)
|
|
{
|
|
util_out_print("Unrecognized text following end of definitions", TRUE);
|
|
pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */
|
|
}
|
|
close_patfile();
|
|
return 1;
|
|
} else
|
|
pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */
|
|
|
|
return -1; /* This will never get executed, added to make compiler happy */
|
|
}
|
|
|
|
static int open_patfile(int name_len, char *file_name)
|
|
{
|
|
int status;
|
|
unsigned char *name_copy;
|
|
|
|
# ifdef VMS
|
|
fab = cc$rms_fab;
|
|
fab.fab$l_fna = file_name;
|
|
fab.fab$b_fns = name_len;
|
|
status = sys$open(&fab);
|
|
if (!(status & 1))
|
|
return 0;
|
|
rab = cc$rms_rab;
|
|
rab.rab$l_fab = &fab;
|
|
rab.rab$l_ubf = patline;
|
|
rab.rab$w_usz = SIZEOF(patline);
|
|
status = sys$connect(&rab);
|
|
if (RMS$_NORMAL != status)
|
|
return 0;
|
|
# else
|
|
name_copy = malloc(name_len + 1);
|
|
memcpy(name_copy, file_name, name_len);
|
|
name_copy[name_len] = '\0';
|
|
patfile = Fopen((const char *)name_copy, "r");
|
|
free(name_copy);
|
|
if (NULL == patfile)
|
|
return 0;
|
|
# endif
|
|
if (getaline())
|
|
ch = patline;
|
|
return 1;
|
|
}
|
|
|
|
static int pat_lex(void)
|
|
{
|
|
int continuation = 0;
|
|
char *id;
|
|
|
|
if (NULL == ch)
|
|
return T_EOF; /* EOF already seen */
|
|
|
|
/* process whitespace */
|
|
skip_whitespace:
|
|
while ((' ' >= *ch) || (';' == *ch))
|
|
{
|
|
if (('\n' == *ch) || (';' == *ch))
|
|
{
|
|
ch = getaline() ? patline : NULL;
|
|
pat_linenum++;
|
|
if (!continuation)
|
|
return T_NL;
|
|
continuation = 0;
|
|
} else
|
|
ch++;
|
|
}
|
|
if (continuation)
|
|
return continuation;
|
|
/* process lexeme */
|
|
switch (*ch) {
|
|
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
|
|
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
|
|
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
|
|
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
|
|
case 'Y': case 'Z':
|
|
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
|
|
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
|
|
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
|
|
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
|
|
case 'y': case 'z':
|
|
id = (char *)ident;
|
|
idlen = 0;
|
|
do {
|
|
*id++ = *ch++;
|
|
idlen++;
|
|
} while (typemask[*ch] & (PATM_A | PATM_N));
|
|
*id++ = '\0';
|
|
if (patcmp(ident, (uchar_ptr_t)"PATCODE") == 0)
|
|
return K_PATCODE;
|
|
if (patcmp(ident, (uchar_ptr_t)"PATTABLE") == 0)
|
|
return K_PATTABLE;
|
|
if (patcmp(ident, (uchar_ptr_t)"PATSTART") == 0)
|
|
return K_PATSTART;
|
|
if (patcmp(ident, (uchar_ptr_t)"PATEND") == 0)
|
|
return K_PATEND;
|
|
return T_IDENT;
|
|
case '0': case '1': case '2': case '3': case '4':
|
|
case '5': case '6': case '7': case '8': case '9':
|
|
number = *ch++ - '0';
|
|
while (typemask[*ch] & PATM_N)
|
|
number = 10 * number + *ch++ - '0';
|
|
return T_NUMBER;
|
|
break;
|
|
case '-':
|
|
continuation = '-';
|
|
ch++;
|
|
goto skip_whitespace;
|
|
default:
|
|
return *ch++;
|
|
}
|
|
}
|
|
|
|
static int patcmp(unsigned char *str1,unsigned char *str2)
|
|
{
|
|
int cmp;
|
|
|
|
while ('\0' != *str2)
|
|
{
|
|
cmp = lower_to_upper_table[*str1++] - lower_to_upper_table[*str2++];
|
|
if (0 != cmp)
|
|
return cmp;
|
|
}
|
|
return *str1;
|
|
}
|
|
|
|
static void pattab_error(int name_len,char *file_name,int linenum)
|
|
{
|
|
error_def (ERR_PATTABSYNTAX);
|
|
|
|
close_patfile();
|
|
rts_error(VARLSTCNT(5) ERR_PATTABSYNTAX, 3, name_len, file_name, linenum);
|
|
}
|
|
|
|
int setpattab(mstr *table_name)
|
|
{
|
|
int letter;
|
|
pattern **patp;
|
|
unsigned char ptnam[MAXPATNAM + 1];
|
|
|
|
if (table_name->len <= MAXPATNAM)
|
|
{ /* null-terminate the pattern table name. */
|
|
if (table_name->len)
|
|
{
|
|
memcpy(ptnam, table_name->addr, table_name->len);
|
|
ptnam[table_name->len] = '\0';
|
|
} else
|
|
{ /* Default table name */
|
|
ptnam[0] = 'M';
|
|
ptnam[1] = 0;
|
|
}
|
|
for (patp = &pattern_list; NULL != *patp; patp = &(*patp)->flink)
|
|
{
|
|
if (0 == patcmp(ptnam, (unsigned char *)((*patp)->name)))
|
|
{
|
|
pattern_typemask = (*patp)->typemask;
|
|
curr_pattern = (*patp);
|
|
/* reset pat_allmaskbits to correspond to the currently active pattern_typemask */
|
|
for (pat_allmaskbits = 0, letter = 0; letter < max_patents; letter++)
|
|
pat_allmaskbits |= pattern_typemask[letter];
|
|
return TRUE;
|
|
}
|
|
}
|
|
}
|
|
return FALSE;
|
|
}
|