/**************************************************************** * * * Copyright 2001, 2009 Fidelity Information Services, Inc * * * * This source code contains the intellectual property * * of its copyright holder(s), and is made available * * under a license. If you do not know the terms of * * the license, please stop and do not read further. * * * ****************************************************************/ /* This facility loads a pattern table from the file specified. Returns success as a Boolean. A sample pattern table definition file follows: PATSTART PATTABLE EDM PATCODE c 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,- 24,25,26,27,28,29,30,31,127,128,129,130,131,132,133,134,135,136,- 137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,- 153,154,155,156,157,158,159,255 PATCODE n 48,49,50,51,52,53,54,55,56,57 PATCODE u 65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,- 86,87,88,89,90,192,193,194,195,196,197,198,199,200,201,202,203,- 204,205,206,207,209,210,211,212,213,214,215,216,217,218,219,220,- 221 PATCODE K 66,67,68,70,71,72,74,75,76,77,78,80,81,82,83,84,86,87,88,89,90,- 98,99,100,102,103,104,106,107,108,109,110,112,113,114,115,116,- 118,119,120,121,122 PATCODE l 97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,- 114,115,116,117,118,119,120,121,122,170,186,223,224,225,226,227,- 228,229,230,231,232,233,234,235,236,237,238,239,241,242,243,244,- 245,246,247,248,249,250,251,252,253 PATCODE p 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,58,59,60,61,62,- 63,64,91,92,93,94,95,96,123,124,125,126,160,161,162,163,164,165,- 166,167,168,169,171,172,173,174,175,176,177,178,179,180,181,182,- 183,184,185,187,188,189,190,191,208,222,240,254 PATCODE V 65,69,73,79,85,89,- 97,101,105,111,117,121 PATEND Note that this table does not include a definition for pattern codes A and E. A is implicitly defined as the union of L and U, and E is implicitly defined as the union of all other classes. ; This is a test of the GT.M/I18N user-definable pattern- ; match table definition. ; patsTaRt PattaBLE Example PATCODE A ; WARNING: patcodes A and E cannot be re-defined 42, 46, 43, 75 - ; comments after continuation , 5, - 63, 91, 92, 93 ; comments at end of directive PATcode u ; This is an explicit U code definition 92, 127, 128, 255 PaTcOdE V ; GT.M specific user-defined code 102, 104, 109, 121 patcode YcntY 65, 69, 73, 79, 85 ; ANSI user-defined patcode PaTeNd */ #include "mdef.h" #include "gtm_string.h" #ifdef VMS #include #include #include #include #include "vmsdtype.h" #else #include "gtm_stdio.h" #endif #include "patcode.h" #include "iosp.h" #include "io.h" #ifdef UNIX #include "eintr_wrappers.h" #endif #include "util.h" #include "trans_log_name.h" #include "gtm_logicals.h" #define MAXPATNAM 256 #define MAX_FILE 256 enum {T_EOF = 256, T_NL, T_SYNTAX, T_NUMBER, T_IDENT, K_PATSTART, K_PATEND, K_PATTABLE, K_PATCODE}; GBLREF uint4 mapbit[]; GBLREF uint4 pat_allmaskbits; LITREF uint4 typemask[PATENTS]; GBLREF uint4 *pattern_typemask; GBLREF pattern *pattern_list; GBLREF pattern *curr_pattern; GBLREF pattern mumps_pattern; LITREF unsigned char lower_to_upper_table[]; static int pat_linenum = 0; static int token; static unsigned char ident[MAXPATNAM + 1]; static int idlen; static int number, max_patents; static char *ch = NULL; static char patline[MAXPATNAM + 2]; #ifdef VMS static struct FAB fab; static struct RAB rab; #else static FILE *patfile; #endif #ifdef DEBUG void dump_tables(void); #endif static void close_patfile(void); static int getaline(void); static int open_patfile(int name_len, char *file_name); static int pat_lex(void); static int patcmp(unsigned char *str1, unsigned char *str2); static void pattab_error(int name_len, char *file_name, int linenum); static void close_patfile(void) { #ifdef VMS sys$close(&fab); #else int fclose_res; FCLOSE(patfile, fclose_res); #endif } #ifdef DEBUG void dump_tables(void) { int mx; char mout; pattern **patp; for (patp = &pattern_list; NULL != *patp; patp = &(*patp)->flink) { util_out_print("!/Pattern Table \"!AD\":!/", TRUE, LEN_AND_STR((*patp)->name)); for (mx = 0; mx < max_patents; mx++) { if (mx >= 32 && mx < 127) { mout = mx; util_out_print("!3UL: !XL ('!AD')!/", TRUE, mx, (*patp)->typemask[mx], 1, &mout); } else { util_out_print("!3UL: !XL!/", TRUE, mx, (*patp)->typemask[mx]); } } } } #endif static int getaline(void) { int status; #ifdef VMS status = sys$get(&rab); if (RMS$_EOF == status) return 0; patline[rab.rab$w_rsz] = '\n'; patline[rab.rab$w_rsz + 1] = '\0'; #else char *fgets_res; if (NULL == FGETS(patline, SIZEOF(patline), patfile, fgets_res)) return 0; #endif return 1; } int getpattabnam(mstr *outname) { outname->addr = curr_pattern->name; outname->len = curr_pattern->namlen; return 1; } int initialize_pattern_table(void) { char buffer[MAX_TRANS_NAME_LEN]; int status, letter; mstr patname, transnam; static MSTR_CONST(pat_file, PAT_FILE); static MSTR_CONST(pat_table, PAT_TABLE); /* Initialize the pattern/typemask table size. Note that in UTF-8 mode, we * only use the lower half of the table (0 - 127). Although we do not extend * user-defined patcodes for multi-byte characters, we still need to allow * user defined patcodes for the entire ASCII charset (0 - 127) in UTF-8 mode. */ max_patents = (gtm_utf8_mode ? PATENTS_UTF8 : PATENTS); /* Initialize pattern/typemask structures and pat_allmaskbits for default typemask */ curr_pattern = pattern_list = &mumps_pattern; pattern_typemask = mumps_pattern.typemask = (uint4 *)&(typemask[0]); for (pat_allmaskbits = 0, letter = 0; letter < max_patents; letter++) pat_allmaskbits |= pattern_typemask[letter]; /* used in do_patfixed/do_pattern */ /* Locate default pattern file and load it. */ status = TRANS_LOG_NAME(&pat_file, &transnam, buffer, SIZEOF(buffer), do_sendmsg_on_log2long); if (SS_NORMAL != status) return 0; if (!load_pattern_table(transnam.len, transnam.addr)) return 0; /* Establish default pattern table. */ status = TRANS_LOG_NAME(&pat_table,&transnam,buffer, SIZEOF(buffer), do_sendmsg_on_log2long); if (SS_NORMAL != status) return 0; patname.len = transnam.len; patname.addr = transnam.addr; return setpattab(&patname); } int load_pattern_table(int name_len,char *file_name) { unsigned char newtabnam[MAXPATNAM + 1], newYZnam[PAT_YZMAXNUM][PAT_YZMAXLEN]; int code, cmp, cnt, newtable[PATENTS], newnamlen, newYZlen[PAT_YZMAXNUM]; int newYZnum = -1; /* number of ANSI user-defined patcodes */ pattern *newpat, **patp ; if (!open_patfile(name_len, file_name)) return 0; pat_linenum = 1; while (T_NL == (token = pat_lex())) ; if (K_PATSTART == token) { if (T_NL != (token = pat_lex())) { util_out_print("Unrecognized text at end of line", TRUE); pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */ } while (T_NL == (token = pat_lex())) ; while (K_PATTABLE == token) { /* Set up a pattern table record. */ if (T_IDENT != (token = pat_lex())) { util_out_print("Identifier expected, found !AD", TRUE, idlen, ident); pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */ } newnamlen = idlen; memcpy(newtabnam, ident, newnamlen + 1); if (T_NL != (token = pat_lex())) { util_out_print("Unrecognized text at end of line", TRUE); pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */ } while (T_NL == (token = pat_lex())) ; /* Process PATCODE directives */ memset(&newtable[0], 0, max_patents * SIZEOF(newtable[0])); for (cnt = 0; cnt < PAT_YZMAXNUM; cnt++) newYZlen[cnt] = 0; newYZnum = -1; while (K_PATCODE == token) { if (T_IDENT != (token = pat_lex())) { util_out_print("Identifier expected, found !AD", TRUE, idlen, ident); pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */ } code = lower_to_upper_table[ident[0]]; if (idlen > 1) { if (((code != 'Y') && (code != 'Z')) || (ident[0] != ident[idlen - 1])) { util_out_print("User-defined pattern code (!AD) not delimited by Y or Z", TRUE, idlen, ident); pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */ } if (idlen > PAT_YZMAXLEN) { util_out_print("Length of pattern code name (!AD) longer than maximum !UL", TRUE, idlen, ident, PAT_YZMAXLEN); pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */ } newYZnum++; if (newYZnum >= PAT_YZMAXNUM) { util_out_print("Number of user-defined patcodes exceeds maximum (!UL)", TRUE, PAT_YZMAXNUM); pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */ } newYZlen[newYZnum] = idlen; memcpy(newYZnam[newYZnum], ident, idlen); code = newYZnum + 'Y'; util_out_print("WARNING: Pattern code !AD not yet implemented", TRUE, idlen, ident); } else { if (code > 'X') { util_out_print("Invalid pattern letter (!AD)", TRUE, idlen, ident); pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */ } if ((code == 'E') || (code == 'A')) { util_out_print("Attempt to redefine pattern code !AD", TRUE, idlen, ident); pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */ } } code = code - 'A'; if (T_NL != (token = pat_lex())) { util_out_print("Unrecognized text at end of line", TRUE); pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */ } while (T_NL == (token = pat_lex())) ; /* Process character list setting the code's flag into the typemask */ if (T_NUMBER == token) { if (number >= max_patents) { util_out_print("Character code greater than !UL encountered (!UL)", TRUE, max_patents - 1, number); pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */ } newtable[number] |= mapbit[code]; while (',' == (token = pat_lex())) { if (T_NUMBER != (token = pat_lex())) { util_out_print("Numeric character code expected, found !AD", TRUE, idlen, ident); pattab_error(name_len, file_name, pat_linenum); /* error does not return */ } if (number >= max_patents) { util_out_print("Character code greater than !UL encountered (!UL)", TRUE, max_patents - 1, number); pattab_error(name_len, file_name, pat_linenum); /* error does not return */ } newtable[number] |= mapbit[code]; } if (T_NL != token) { util_out_print("Unrecognized text at end of line", TRUE); pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */ } while (T_NL == (token = pat_lex())) ; } } for (patp = &pattern_list; (*patp) ; patp = &(*patp)->flink) { cmp = patcmp(newtabnam, (uchar_ptr_t)(*patp)->name); if (0 == cmp) { /* don't read in same table name twice */ util_out_print("Cannot load table !AD twice", TRUE, newnamlen, newtabnam); pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */ } else if (cmp < 0) break; } newpat = (pattern *) malloc(SIZEOF(pattern) + newnamlen); newpat->flink = (*patp); newpat->namlen = newnamlen; memcpy(newpat->name, newtabnam, newnamlen + 1); newpat->typemask = (uint4 *) malloc(max_patents * SIZEOF(typemask[0])); memcpy(newpat->typemask, newtable, max_patents * SIZEOF(typemask[0])); newpat->patYZnam = (unsigned char *) malloc(SIZEOF(newYZnam)); memcpy(newpat->patYZnam, newYZnam, SIZEOF(newYZnam)); newpat->patYZlen = (int *) malloc(SIZEOF(newYZlen)); memcpy(newpat->patYZlen, newYZlen, SIZEOF(newYZlen)); newpat->patYZnum = newYZnum; (*patp) = newpat; } if (K_PATEND != token) { util_out_print("End of definition marker (PATEND) expected", TRUE); pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */ } while (T_NL == (token = pat_lex())) ; if (T_EOF != token) { util_out_print("Unrecognized text following end of definitions", TRUE); pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */ } close_patfile(); return 1; } else pattab_error(name_len, file_name, pat_linenum); /* error trap does not return */ return -1; /* This will never get executed, added to make compiler happy */ } static int open_patfile(int name_len, char *file_name) { int status; unsigned char *name_copy; # ifdef VMS fab = cc$rms_fab; fab.fab$l_fna = file_name; fab.fab$b_fns = name_len; status = sys$open(&fab); if (!(status & 1)) return 0; rab = cc$rms_rab; rab.rab$l_fab = &fab; rab.rab$l_ubf = patline; rab.rab$w_usz = SIZEOF(patline); status = sys$connect(&rab); if (RMS$_NORMAL != status) return 0; # else name_copy = malloc(name_len + 1); memcpy(name_copy, file_name, name_len); name_copy[name_len] = '\0'; patfile = Fopen((const char *)name_copy, "r"); free(name_copy); if (NULL == patfile) return 0; # endif if (getaline()) ch = patline; return 1; } static int pat_lex(void) { int continuation = 0; char *id; if (NULL == ch) return T_EOF; /* EOF already seen */ /* process whitespace */ skip_whitespace: while ((' ' >= *ch) || (';' == *ch)) { if (('\n' == *ch) || (';' == *ch)) { ch = getaline() ? patline : NULL; pat_linenum++; if (!continuation) return T_NL; continuation = 0; } else ch++; } if (continuation) return continuation; /* process lexeme */ switch (*ch) { case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': id = (char *)ident; idlen = 0; do { *id++ = *ch++; idlen++; } while (typemask[*ch] & (PATM_A | PATM_N)); *id++ = '\0'; if (patcmp(ident, (uchar_ptr_t)"PATCODE") == 0) return K_PATCODE; if (patcmp(ident, (uchar_ptr_t)"PATTABLE") == 0) return K_PATTABLE; if (patcmp(ident, (uchar_ptr_t)"PATSTART") == 0) return K_PATSTART; if (patcmp(ident, (uchar_ptr_t)"PATEND") == 0) return K_PATEND; return T_IDENT; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': number = *ch++ - '0'; while (typemask[*ch] & PATM_N) number = 10 * number + *ch++ - '0'; return T_NUMBER; break; case '-': continuation = '-'; ch++; goto skip_whitespace; default: return *ch++; } } static int patcmp(unsigned char *str1,unsigned char *str2) { int cmp; while ('\0' != *str2) { cmp = lower_to_upper_table[*str1++] - lower_to_upper_table[*str2++]; if (0 != cmp) return cmp; } return *str1; } static void pattab_error(int name_len,char *file_name,int linenum) { error_def (ERR_PATTABSYNTAX); close_patfile(); rts_error(VARLSTCNT(5) ERR_PATTABSYNTAX, 3, name_len, file_name, linenum); } int setpattab(mstr *table_name) { int letter; pattern **patp; unsigned char ptnam[MAXPATNAM + 1]; if (table_name->len <= MAXPATNAM) { /* null-terminate the pattern table name. */ if (table_name->len) { memcpy(ptnam, table_name->addr, table_name->len); ptnam[table_name->len] = '\0'; } else { /* Default table name */ ptnam[0] = 'M'; ptnam[1] = 0; } for (patp = &pattern_list; NULL != *patp; patp = &(*patp)->flink) { if (0 == patcmp(ptnam, (unsigned char *)((*patp)->name))) { pattern_typemask = (*patp)->typemask; curr_pattern = (*patp); /* reset pat_allmaskbits to correspond to the currently active pattern_typemask */ for (pat_allmaskbits = 0, letter = 0; letter < max_patents; letter++) pat_allmaskbits |= pattern_typemask[letter]; return TRUE; } } } return FALSE; }