113 lines
3.7 KiB
C
113 lines
3.7 KiB
C
/****************************************************************
|
|
* *
|
|
* Copyright 2001, 2009 Fidelity Information Services, Inc *
|
|
* *
|
|
* This source code contains the intellectual property *
|
|
* of its copyright holder(s), and is made available *
|
|
* under a license. If you do not know the terms of *
|
|
* the license, please stop and do not read further. *
|
|
* *
|
|
****************************************************************/
|
|
|
|
#include "mdef.h"
|
|
|
|
#include "gtm_string.h"
|
|
|
|
#include "copy.h"
|
|
#include "patcode.h"
|
|
#include "min_max.h"
|
|
|
|
/* This function is part of the MUMPS compiler. It adds one pattern atom to the string of compiled pattern atoms.
|
|
* If the atom to be added can be "compressed" with the previous one, this function will allow compress() to do so.
|
|
*/
|
|
boolean_t add_atom(int *count,
|
|
uint4 pattern_mask,
|
|
pat_strlit *strlit_buff,
|
|
boolean_t infinite,
|
|
int *min,
|
|
int *max,
|
|
int *size,
|
|
int *total_min,
|
|
int *total_max,
|
|
int lower_bound,
|
|
int upper_bound,
|
|
int altmin,
|
|
int altmax,
|
|
boolean_t *last_infinite_ptr,
|
|
uint4 **fstchar_ptr,
|
|
uint4 **outchar_ptr,
|
|
uint4 **lastpatptr_ptr)
|
|
{
|
|
uint4 *patmaskptr;
|
|
gtm_uint64_t bound;
|
|
int4 bytelen;
|
|
|
|
if ((pattern_mask & PATM_STRLIT) && !strlit_buff->bytelen && *count)
|
|
{ /* A special case is a pattern like xxx?1N5.7""2A . Since there is an infinite number of empty strings between
|
|
* any two characters in a string, a pattern atom that counts repetitions of the fixed string "" can be ignored.
|
|
* That is, such an atom can only be ignored if it is not the only one in the pattern...
|
|
*/
|
|
return TRUE;
|
|
}
|
|
if (*count && !*(size - 1))
|
|
{ /* If the previous atom was an n.m"", it should be removed. In such a case, the last four values
|
|
* in the 'outchar' array are PATM_STRLIT (pattern mask), 0 (bytelen), 0 (charlen), flags (ASCII and no BADCHAR). */
|
|
assert(3 == PAT_STRLIT_PADDING);
|
|
assert(PATM_STRLIT == *(*outchar_ptr - (PAT_STRLIT_PADDING + 1)));
|
|
assert(0 == *(*outchar_ptr - 3)); /* bytelen */
|
|
assert(0 == *(*outchar_ptr - 2)); /* charlen */
|
|
assert(!((*(*outchar_ptr - 1)) & PATM_STRLIT_NONASCII)); /* flags - ascii */
|
|
assert(!((*(*outchar_ptr - 1)) & PATM_STRLIT_BADCHAR)); /* flags - no badchar */
|
|
*outchar_ptr -= (PAT_STRLIT_PADDING + 1);
|
|
(*count)--;
|
|
assert(0 == *count);
|
|
min--;
|
|
max--;
|
|
size--;
|
|
}
|
|
if (pattern_mask & PATM_ALT)
|
|
{
|
|
lower_bound = BOUND_MULTIPLY(lower_bound, altmin, bound);
|
|
upper_bound = BOUND_MULTIPLY(upper_bound, altmax, bound);
|
|
}
|
|
if (*count && pat_compress(pattern_mask, strlit_buff, infinite, *last_infinite_ptr, *lastpatptr_ptr))
|
|
{
|
|
min--;
|
|
max--;
|
|
size--;
|
|
*min = MIN(*min + lower_bound, PAT_MAX_REPEAT);
|
|
*max = MIN(*max + upper_bound, PAT_MAX_REPEAT);
|
|
} else
|
|
{
|
|
*min = MIN(lower_bound, PAT_MAX_REPEAT);
|
|
*max = MIN(upper_bound, PAT_MAX_REPEAT);
|
|
*lastpatptr_ptr = patmaskptr = *outchar_ptr;
|
|
*last_infinite_ptr = infinite;
|
|
(*outchar_ptr)++;
|
|
if (*outchar_ptr - *fstchar_ptr > MAX_PATTERN_LENGTH)
|
|
return FALSE;
|
|
if ((pattern_mask & PATM_ALT) || !(pattern_mask & PATM_STRLIT))
|
|
{
|
|
*patmaskptr++ = pattern_mask;
|
|
*size = 1;
|
|
} else
|
|
{
|
|
bytelen = strlit_buff->bytelen;
|
|
*outchar_ptr += DIVIDE_ROUND_UP(bytelen, SIZEOF(uint4)) + PAT_STRLIT_PADDING;
|
|
if (*outchar_ptr - *fstchar_ptr > MAX_PATTERN_LENGTH)
|
|
return FALSE;
|
|
*patmaskptr++ = pattern_mask;
|
|
memcpy(patmaskptr, strlit_buff, bytelen + PAT_STRLIT_PADDING * SIZEOF(uint4));
|
|
*size = strlit_buff->charlen;
|
|
}
|
|
(*count)++;
|
|
}
|
|
*total_min += BOUND_MULTIPLY(*size, lower_bound, bound);
|
|
if (*total_min > PAT_MAX_REPEAT)
|
|
*total_min = PAT_MAX_REPEAT;
|
|
*total_max += BOUND_MULTIPLY(*size, upper_bound, bound);
|
|
if (*total_max > PAT_MAX_REPEAT)
|
|
*total_max = PAT_MAX_REPEAT;
|
|
return TRUE;
|
|
}
|