324 lines
12 KiB
C
324 lines
12 KiB
C
/****************************************************************
|
|
* *
|
|
* Copyright 2006, 2012 Fidelity Information Services, Inc *
|
|
* *
|
|
* This source code contains the intellectual property *
|
|
* of its copyright holder(s), and is made available *
|
|
* under a license. If you do not know the terms of *
|
|
* the license, please stop and do not read further. *
|
|
* *
|
|
****************************************************************/
|
|
|
|
#include "mdef.h"
|
|
|
|
#include "gtm_string.h"
|
|
#include "stringpool.h"
|
|
#include "min_max.h"
|
|
#include "fnpc.h"
|
|
#include "op.h"
|
|
#include "gtm_utf8.h"
|
|
|
|
GBLREF spdesc stringpool;
|
|
GBLREF boolean_t gtm_utf8_mode; /* We are indeed doing the UTF8 thang */
|
|
GBLREF boolean_t badchar_inhibit; /* No BADCHAR errors should be signaled */
|
|
|
|
#ifdef DEBUG
|
|
GBLREF boolean_t setp_work;
|
|
GBLREF int cs_small; /* scanned small string brute force */
|
|
GBLREF int cs_small_pcs; /* chars scanned by small scan */
|
|
# define SETWON setp_work = TRUE;
|
|
# define SETWOFF setp_work = FALSE;
|
|
# define COUNT_EVENT(x) ++x;
|
|
# define INCR_COUNT(x,y) x += y;
|
|
#else
|
|
# define SETWON
|
|
# define SETWOFF
|
|
# define COUNT_EVENT(x)
|
|
# define INCR_COUNT(x,y)
|
|
#endif
|
|
|
|
error_def(ERR_MAXSTRLEN);
|
|
|
|
/*
|
|
* ----------------------------------------------------------
|
|
* Fast path setpiece when delimiter is one (lit) char replacing
|
|
* a single piece (last is same as first). Unicode flavor.
|
|
*
|
|
* Arguments:
|
|
* src - source mval
|
|
* delim - delimiter char
|
|
* expr - expression string mval
|
|
* ind - index in source mval to be set
|
|
* dst - destination mval where the result is saved.
|
|
*
|
|
* Return:
|
|
* none
|
|
* ----------------------------------------------------------
|
|
*/
|
|
void op_setp1(mval *src, int delim, mval *expr, int ind, mval *dst)
|
|
{
|
|
size_t str_len, delim_cnt;
|
|
int len, pfx_str_len, sfx_start_offset, sfx_str_len, rep_str_len, pfx_scan_offset;
|
|
int dlmlen, cpy_cache_lines, mblen;
|
|
unsigned char *start_sfx, *str_addr, *end_pfx, *end_src, *start_pfx;
|
|
boolean_t do_scan, delim_last_scan, valid_char;
|
|
mval dummymval; /* It's value is not used but is part of the call to op_fnp1() */
|
|
fnpc *cfnpc, *pfnpc;
|
|
delimfmt ldelim;
|
|
DCL_THREADGBL_ACCESS;
|
|
|
|
SETUP_THREADGBL_ACCESS;
|
|
assert(gtm_utf8_mode);
|
|
do_scan = FALSE;
|
|
cpy_cache_lines = -1;
|
|
ldelim.unichar_val = delim;
|
|
if (!UTF8_VALID(ldelim.unibytes_val, (ldelim.unibytes_val + SIZEOF(ldelim.unibytes_val)), dlmlen)
|
|
&& !badchar_inhibit)
|
|
{ /* The delimiter is a bad character so error out if badchar not inhibited */
|
|
UTF8_BADCHAR(0, ldelim.unibytes_val, ldelim.unibytes_val + SIZEOF(ldelim.unibytes_val), 0, NULL);
|
|
}
|
|
MV_FORCE_STR(expr); /* Expression to put into piece place */
|
|
if (MV_DEFINED(src))
|
|
{
|
|
/* We have 3 possible scenarios:
|
|
* 1) The source string is null. Nothing to do but proceed to building output.
|
|
* 2) If the requested piece is larger than can be cached by op_fnp1, call fnp1
|
|
* for the maximum piece possible, use the cache info to "prime the pump" and
|
|
* then process the rest of the string ourselves.
|
|
* 3) If the requested piece can be obtained from the cache, call op_fnp1 to validate
|
|
* and rebuild the cache if necessary and then retrieve the necessary info from
|
|
* the fnpc cache.
|
|
*/
|
|
MV_FORCE_STR(src); /* Make sure is string prior to length check */
|
|
if (0 == src->str.len)
|
|
{ /* We have a null source string */
|
|
pfx_str_len = sfx_str_len = sfx_start_offset = 0;
|
|
delim_cnt = (0 < ind) ? (size_t)ind - 1 : 0;
|
|
} else if (FNPC_ELEM_MAX >= ind)
|
|
{ /* 3) Best of all possible cases. The op_fnp1 can do most of our work for us
|
|
* and we can preload the cache on the new string to help its subsequent
|
|
* uses along as well.
|
|
*/
|
|
SETWON;
|
|
op_fnp1(src, delim, ind, &dummymval);
|
|
SETWOFF;
|
|
cfnpc = &(TREF(fnpca)).fnpcs[src->fnpc_indx - 1];
|
|
assert(cfnpc->last_str.addr == src->str.addr);
|
|
assert(cfnpc->last_str.len == src->str.len);
|
|
assert(cfnpc->delim == delim);
|
|
assert(0 < cfnpc->npcs);
|
|
/* Three more scenarios: #1 piece all in cache, #2 piece would be in cache but ran
|
|
* out of text or #3 piece is beyond what can be cached
|
|
*/
|
|
if (cfnpc->npcs >= ind)
|
|
{ /* #1 The piece we want is totally within the cache which is good news */
|
|
pfx_str_len = cfnpc->pstart[ind - 1];
|
|
delim_cnt = 0;
|
|
sfx_start_offset = cfnpc->pstart[ind] - dlmlen; /* Include delimiter */
|
|
rep_str_len = cfnpc->pstart[ind] - cfnpc->pstart[ind - 1] - dlmlen; /* Replace string length */
|
|
sfx_str_len = src->str.len - pfx_str_len - rep_str_len;
|
|
cpy_cache_lines = ind - 1;
|
|
} else
|
|
{ /* #2 The string was too short so the cache does not contain our string. This means
|
|
* that the prefix becomes any text that IS in the cache and we set the delim_cnt
|
|
* to be the number of missing pieces so the delimiters can be put in as part of the
|
|
* prefix when we build the new string.
|
|
*/
|
|
pfx_str_len = cfnpc->pstart[cfnpc->npcs] - dlmlen;
|
|
delim_cnt = (size_t)(ind - cfnpc->npcs);
|
|
sfx_start_offset = 0;
|
|
sfx_str_len = 0;
|
|
cpy_cache_lines = cfnpc->npcs;
|
|
}
|
|
} else
|
|
{ /* 2) We have a element that would not be able to be in the fnpc cache. Go ahead
|
|
* and call op_fnp1 to get cache info up to the maximum and then we will continue
|
|
* the scan on our own.
|
|
*/
|
|
SETWON;
|
|
op_fnp1(src, delim, FNPC_ELEM_MAX, &dummymval);
|
|
SETWOFF;
|
|
cfnpc = &(TREF(fnpca)).fnpcs[src->fnpc_indx - 1];
|
|
assert(cfnpc->last_str.addr == src->str.addr);
|
|
assert(cfnpc->last_str.len == src->str.len);
|
|
assert(cfnpc->delim == delim);
|
|
assert(0 < cfnpc->npcs);
|
|
if (FNPC_ELEM_MAX > cfnpc->npcs)
|
|
{ /* We ran out of text so the scan is complete. This is basically the same
|
|
* as case #2 above.
|
|
*/
|
|
pfx_str_len = cfnpc->pstart[cfnpc->npcs] - dlmlen;
|
|
delim_cnt = (size_t)(ind - cfnpc->npcs);
|
|
sfx_start_offset = 0;
|
|
sfx_str_len = 0;
|
|
cpy_cache_lines = cfnpc->npcs;
|
|
} else
|
|
{ /* We have a case where the piece we want cannot be kept in cache. In the special
|
|
* case where there is no more text to handle, we don't need to scan further. Otherwise
|
|
* we prime the pump and continue the scan where the cache left off.
|
|
*/
|
|
if ((pfx_scan_offset = cfnpc->pstart[FNPC_ELEM_MAX]) < src->str.len) /* Note assignment */
|
|
/* Normal case where we prime the pump */
|
|
do_scan = TRUE;
|
|
else
|
|
{ /* Special case -- no more text to scan */
|
|
pfx_str_len = cfnpc->pstart[FNPC_ELEM_MAX] - dlmlen;
|
|
sfx_start_offset = 0;
|
|
sfx_str_len = 0;
|
|
}
|
|
delim_cnt = (size_t)ind - FNPC_ELEM_MAX;
|
|
cpy_cache_lines = FNPC_ELEM_MAX;
|
|
}
|
|
}
|
|
} else
|
|
{ /* Source is not defined -- treat as a null string */
|
|
pfx_str_len = sfx_str_len = sfx_start_offset = 0;
|
|
delim_cnt = (size_t)ind - 1;
|
|
}
|
|
/* If we have been forced to do our own scan, do that here. Note the variable pfx_scan_offset has been
|
|
* set to where the scan should begin in the src string and delim_cnt has been set to how many delimiters
|
|
* still need to be processed.
|
|
*/
|
|
if (do_scan)
|
|
{ /* Scan the line isolating prefix piece, and end of the
|
|
* piece being replaced
|
|
*/
|
|
COUNT_EVENT(cs_small);
|
|
end_pfx = start_sfx = (unsigned char *)src->str.addr + pfx_scan_offset;
|
|
end_src = (unsigned char *)src->str.addr + src->str.len;
|
|
/* The compiler would unroll this loop this way anyway but we want to
|
|
* adjust the start_sfx pointer after the loop but only if we have gone
|
|
* into it at least once.
|
|
*/
|
|
if ((0 < delim_cnt) && (start_sfx < end_src))
|
|
{
|
|
do
|
|
{
|
|
end_pfx = start_sfx;
|
|
delim_last_scan = FALSE; /* Whether delimiter is last character scanned */
|
|
while (start_sfx < end_src)
|
|
{
|
|
valid_char = UTF8_VALID(start_sfx, end_src, mblen); /* Length of next char */
|
|
if (!valid_char)
|
|
{ /* Next character is not valid unicode. If badchar error is not inhibited,
|
|
* signal it now. If it is inhibited, just treat the character as a single
|
|
* character and continue.
|
|
*/
|
|
if (!badchar_inhibit)
|
|
utf8_badchar(0, start_sfx, end_src, 0, NULL);
|
|
assert(1 == mblen);
|
|
}
|
|
/* Getting mblen first allows us to do quick length compare before the
|
|
* heavier weight memcmp call.
|
|
*/
|
|
assert(0 < mblen);
|
|
if (mblen == dlmlen && 0 == memcmp(start_sfx, ldelim.unibytes_val, dlmlen))
|
|
{
|
|
delim_last_scan = TRUE;
|
|
break;
|
|
}
|
|
/* Increment ptrs by size of found char */
|
|
start_sfx += mblen;
|
|
}
|
|
start_sfx += dlmlen;
|
|
delim_cnt--;
|
|
} while ((0 < delim_cnt) && (start_sfx < end_src));
|
|
/* We have to backup up the suffix start pointer except under the condition
|
|
* that the last character in the buffer is the last delimiter we were looking
|
|
* for.
|
|
*/
|
|
if ((0 == delim_cnt) || (start_sfx < end_src) || !delim_last_scan)
|
|
start_sfx -= dlmlen; /* Back up suffix to include delimiter char */
|
|
/* If we scanned to the end (no text left) and still have delimiters to
|
|
* find, the entire src text should be part of the prefix
|
|
*/
|
|
if ((start_sfx >= end_src) && (0 < delim_cnt))
|
|
{
|
|
end_pfx = start_sfx;
|
|
if (delim_last_scan) /* if last char was delim, reduce delim cnt */
|
|
--delim_cnt;
|
|
}
|
|
} else
|
|
{
|
|
/* If not doing any token finding, then this count becomes the number
|
|
* of tokens to output. Adjust accordingly.
|
|
*/
|
|
if (0 < delim_cnt)
|
|
--delim_cnt;
|
|
}
|
|
INCR_COUNT(cs_small_pcs, (int)((size_t)ind - delim_cnt));
|
|
/* Now having the following situation:
|
|
* end_pfx -> end of the prefix piece including delimiter
|
|
* start_sfx -> start of suffix piece (with delimiter) or = end_pfx/src->str.addr if none
|
|
*/
|
|
pfx_str_len = (int)(end_pfx - (unsigned char *)src->str.addr);
|
|
if (0 > pfx_str_len)
|
|
pfx_str_len = 0;
|
|
sfx_start_offset = (int)(start_sfx - (unsigned char *)src->str.addr);
|
|
sfx_str_len = src->str.len - sfx_start_offset;
|
|
if (0 > sfx_str_len)
|
|
sfx_str_len = 0;
|
|
}
|
|
/* Calculate total string len. delim_cnt has needed padding delimiters for null fields */
|
|
str_len = (size_t)expr->str.len + (size_t)pfx_str_len + (delim_cnt * (size_t)dlmlen) + (size_t)sfx_str_len;
|
|
if (MAX_STRLEN < str_len)
|
|
rts_error(VARLSTCNT(1) ERR_MAXSTRLEN);
|
|
ENSURE_STP_FREE_SPACE((int)str_len);
|
|
str_addr = stringpool.free;
|
|
start_pfx = (unsigned char *)src->str.addr;
|
|
/* copy prefix */
|
|
if (0 < pfx_str_len)
|
|
{
|
|
memcpy(str_addr, src->str.addr, pfx_str_len);
|
|
str_addr += pfx_str_len;
|
|
}
|
|
/* copy delimiters */
|
|
while (0 < delim_cnt--)
|
|
{
|
|
memcpy(str_addr, ldelim.unibytes_val, dlmlen);
|
|
str_addr += dlmlen;
|
|
}
|
|
/* copy expression */
|
|
if (0 < expr->str.len)
|
|
{
|
|
memcpy(str_addr, expr->str.addr, expr->str.len);
|
|
str_addr += expr->str.len;
|
|
}
|
|
/* copy suffix */
|
|
if (0 < sfx_str_len)
|
|
{
|
|
memcpy(str_addr, start_pfx + sfx_start_offset, sfx_str_len);
|
|
str_addr += sfx_str_len;
|
|
}
|
|
assert((str_addr - stringpool.free) == str_len);
|
|
dst->mvtype = MV_STR;
|
|
dst->str.len = INTCAST(str_addr - stringpool.free);
|
|
dst->str.addr = (char *)stringpool.free;
|
|
stringpool.free = str_addr;
|
|
/* If available, update the cache information for this newly created mval to hopefully
|
|
* give it a head start on its next usage. Note that we can only copy over the cache info
|
|
* for the prefix. We cannot include information for the 'expression' except where it starts
|
|
* because the expression could itself contain delimiters that would be found on a rescan.
|
|
*/
|
|
if (0 < cpy_cache_lines)
|
|
{
|
|
pfnpc = cfnpc; /* pointer for src mval's cache */
|
|
do
|
|
{
|
|
cfnpc = (TREF(fnpca)).fnpcsteal; /* Next cache element to steal */
|
|
if ((TREF(fnpca)).fnpcmax < cfnpc)
|
|
cfnpc = &(TREF(fnpca)).fnpcs[0];
|
|
(TREF(fnpca)).fnpcsteal = cfnpc + 1; /* -> next element to steal */
|
|
} while (cfnpc == pfnpc); /* Make sure we don't step on ourselves */
|
|
cfnpc->last_str = dst->str; /* Save validation info */
|
|
cfnpc->delim = delim;
|
|
cfnpc->npcs = cpy_cache_lines;
|
|
dst->fnpc_indx = cfnpc->indx + 1; /* Save where we are putting this element
|
|
* (1 based index in mval so 0 isn't so common)
|
|
*/
|
|
memcpy(&cfnpc->pstart[0], &pfnpc->pstart[0], (cfnpc->npcs + 1) * SIZEOF(unsigned int));
|
|
} else
|
|
/* No cache available -- just reset index pointer to get fastest cache validation failure */
|
|
dst->fnpc_indx = (unsigned char)-1;
|
|
}
|