fis-gtm/sr_port/add_atom.c

113 lines
3.7 KiB
C

/****************************************************************
* *
* Copyright 2001, 2009 Fidelity Information Services, Inc *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
* the license, please stop and do not read further. *
* *
****************************************************************/
#include "mdef.h"
#include "gtm_string.h"
#include "copy.h"
#include "patcode.h"
#include "min_max.h"
/* This function is part of the MUMPS compiler. It adds one pattern atom to the string of compiled pattern atoms.
* If the atom to be added can be "compressed" with the previous one, this function will allow compress() to do so.
*/
boolean_t add_atom(int *count,
uint4 pattern_mask,
pat_strlit *strlit_buff,
boolean_t infinite,
int *min,
int *max,
int *size,
int *total_min,
int *total_max,
int lower_bound,
int upper_bound,
int altmin,
int altmax,
boolean_t *last_infinite_ptr,
uint4 **fstchar_ptr,
uint4 **outchar_ptr,
uint4 **lastpatptr_ptr)
{
uint4 *patmaskptr;
gtm_uint64_t bound;
int4 bytelen;
if ((pattern_mask & PATM_STRLIT) && !strlit_buff->bytelen && *count)
{ /* A special case is a pattern like xxx?1N5.7""2A . Since there is an infinite number of empty strings between
* any two characters in a string, a pattern atom that counts repetitions of the fixed string "" can be ignored.
* That is, such an atom can only be ignored if it is not the only one in the pattern...
*/
return TRUE;
}
if (*count && !*(size - 1))
{ /* If the previous atom was an n.m"", it should be removed. In such a case, the last four values
* in the 'outchar' array are PATM_STRLIT (pattern mask), 0 (bytelen), 0 (charlen), flags (ASCII and no BADCHAR). */
assert(3 == PAT_STRLIT_PADDING);
assert(PATM_STRLIT == *(*outchar_ptr - (PAT_STRLIT_PADDING + 1)));
assert(0 == *(*outchar_ptr - 3)); /* bytelen */
assert(0 == *(*outchar_ptr - 2)); /* charlen */
assert(!((*(*outchar_ptr - 1)) & PATM_STRLIT_NONASCII)); /* flags - ascii */
assert(!((*(*outchar_ptr - 1)) & PATM_STRLIT_BADCHAR)); /* flags - no badchar */
*outchar_ptr -= (PAT_STRLIT_PADDING + 1);
(*count)--;
assert(0 == *count);
min--;
max--;
size--;
}
if (pattern_mask & PATM_ALT)
{
lower_bound = BOUND_MULTIPLY(lower_bound, altmin, bound);
upper_bound = BOUND_MULTIPLY(upper_bound, altmax, bound);
}
if (*count && pat_compress(pattern_mask, strlit_buff, infinite, *last_infinite_ptr, *lastpatptr_ptr))
{
min--;
max--;
size--;
*min = MIN(*min + lower_bound, PAT_MAX_REPEAT);
*max = MIN(*max + upper_bound, PAT_MAX_REPEAT);
} else
{
*min = MIN(lower_bound, PAT_MAX_REPEAT);
*max = MIN(upper_bound, PAT_MAX_REPEAT);
*lastpatptr_ptr = patmaskptr = *outchar_ptr;
*last_infinite_ptr = infinite;
(*outchar_ptr)++;
if (*outchar_ptr - *fstchar_ptr > MAX_PATTERN_LENGTH)
return FALSE;
if ((pattern_mask & PATM_ALT) || !(pattern_mask & PATM_STRLIT))
{
*patmaskptr++ = pattern_mask;
*size = 1;
} else
{
bytelen = strlit_buff->bytelen;
*outchar_ptr += DIVIDE_ROUND_UP(bytelen, SIZEOF(uint4)) + PAT_STRLIT_PADDING;
if (*outchar_ptr - *fstchar_ptr > MAX_PATTERN_LENGTH)
return FALSE;
*patmaskptr++ = pattern_mask;
memcpy(patmaskptr, strlit_buff, bytelen + PAT_STRLIT_PADDING * SIZEOF(uint4));
*size = strlit_buff->charlen;
}
(*count)++;
}
*total_min += BOUND_MULTIPLY(*size, lower_bound, bound);
if (*total_min > PAT_MAX_REPEAT)
*total_min = PAT_MAX_REPEAT;
*total_max += BOUND_MULTIPLY(*size, upper_bound, bound);
if (*total_max > PAT_MAX_REPEAT)
*total_max = PAT_MAX_REPEAT;
return TRUE;
}