fis-gtm/sr_unix/op_setp1.c

324 lines
12 KiB
C
Raw Normal View History

/****************************************************************
* *
* Copyright 2006, 2012 Fidelity Information Services, Inc *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
* the license, please stop and do not read further. *
* *
****************************************************************/
#include "mdef.h"
#include "gtm_string.h"
#include "stringpool.h"
#include "min_max.h"
#include "fnpc.h"
#include "op.h"
#include "gtm_utf8.h"
GBLREF spdesc stringpool;
GBLREF boolean_t gtm_utf8_mode; /* We are indeed doing the UTF8 thang */
GBLREF boolean_t badchar_inhibit; /* No BADCHAR errors should be signaled */
#ifdef DEBUG
GBLREF boolean_t setp_work;
GBLREF int cs_small; /* scanned small string brute force */
GBLREF int cs_small_pcs; /* chars scanned by small scan */
# define SETWON setp_work = TRUE;
# define SETWOFF setp_work = FALSE;
# define COUNT_EVENT(x) ++x;
# define INCR_COUNT(x,y) x += y;
#else
# define SETWON
# define SETWOFF
# define COUNT_EVENT(x)
# define INCR_COUNT(x,y)
#endif
error_def(ERR_MAXSTRLEN);
/*
* ----------------------------------------------------------
* Fast path setpiece when delimiter is one (lit) char replacing
* a single piece (last is same as first). Unicode flavor.
*
* Arguments:
* src - source mval
* delim - delimiter char
* expr - expression string mval
* ind - index in source mval to be set
* dst - destination mval where the result is saved.
*
* Return:
* none
* ----------------------------------------------------------
*/
void op_setp1(mval *src, int delim, mval *expr, int ind, mval *dst)
{
size_t str_len, delim_cnt;
int len, pfx_str_len, sfx_start_offset, sfx_str_len, rep_str_len, pfx_scan_offset;
int dlmlen, cpy_cache_lines, mblen;
unsigned char *start_sfx, *str_addr, *end_pfx, *end_src, *start_pfx;
boolean_t do_scan, delim_last_scan, valid_char;
mval dummymval; /* It's value is not used but is part of the call to op_fnp1() */
fnpc *cfnpc, *pfnpc;
delimfmt ldelim;
DCL_THREADGBL_ACCESS;
SETUP_THREADGBL_ACCESS;
assert(gtm_utf8_mode);
do_scan = FALSE;
cpy_cache_lines = -1;
ldelim.unichar_val = delim;
if (!UTF8_VALID(ldelim.unibytes_val, (ldelim.unibytes_val + SIZEOF(ldelim.unibytes_val)), dlmlen)
&& !badchar_inhibit)
{ /* The delimiter is a bad character so error out if badchar not inhibited */
UTF8_BADCHAR(0, ldelim.unibytes_val, ldelim.unibytes_val + SIZEOF(ldelim.unibytes_val), 0, NULL);
}
MV_FORCE_STR(expr); /* Expression to put into piece place */
if (MV_DEFINED(src))
{
/* We have 3 possible scenarios:
* 1) The source string is null. Nothing to do but proceed to building output.
* 2) If the requested piece is larger than can be cached by op_fnp1, call fnp1
* for the maximum piece possible, use the cache info to "prime the pump" and
* then process the rest of the string ourselves.
* 3) If the requested piece can be obtained from the cache, call op_fnp1 to validate
* and rebuild the cache if necessary and then retrieve the necessary info from
* the fnpc cache.
*/
MV_FORCE_STR(src); /* Make sure is string prior to length check */
if (0 == src->str.len)
{ /* We have a null source string */
pfx_str_len = sfx_str_len = sfx_start_offset = 0;
delim_cnt = (0 < ind) ? (size_t)ind - 1 : 0;
} else if (FNPC_ELEM_MAX >= ind)
{ /* 3) Best of all possible cases. The op_fnp1 can do most of our work for us
* and we can preload the cache on the new string to help its subsequent
* uses along as well.
*/
SETWON;
op_fnp1(src, delim, ind, &dummymval);
SETWOFF;
cfnpc = &(TREF(fnpca)).fnpcs[src->fnpc_indx - 1];
assert(cfnpc->last_str.addr == src->str.addr);
assert(cfnpc->last_str.len == src->str.len);
assert(cfnpc->delim == delim);
assert(0 < cfnpc->npcs);
/* Three more scenarios: #1 piece all in cache, #2 piece would be in cache but ran
* out of text or #3 piece is beyond what can be cached
*/
if (cfnpc->npcs >= ind)
{ /* #1 The piece we want is totally within the cache which is good news */
pfx_str_len = cfnpc->pstart[ind - 1];
delim_cnt = 0;
sfx_start_offset = cfnpc->pstart[ind] - dlmlen; /* Include delimiter */
rep_str_len = cfnpc->pstart[ind] - cfnpc->pstart[ind - 1] - dlmlen; /* Replace string length */
sfx_str_len = src->str.len - pfx_str_len - rep_str_len;
cpy_cache_lines = ind - 1;
} else
{ /* #2 The string was too short so the cache does not contain our string. This means
* that the prefix becomes any text that IS in the cache and we set the delim_cnt
* to be the number of missing pieces so the delimiters can be put in as part of the
* prefix when we build the new string.
*/
pfx_str_len = cfnpc->pstart[cfnpc->npcs] - dlmlen;
delim_cnt = (size_t)(ind - cfnpc->npcs);
sfx_start_offset = 0;
sfx_str_len = 0;
cpy_cache_lines = cfnpc->npcs;
}
} else
{ /* 2) We have a element that would not be able to be in the fnpc cache. Go ahead
* and call op_fnp1 to get cache info up to the maximum and then we will continue
* the scan on our own.
*/
SETWON;
op_fnp1(src, delim, FNPC_ELEM_MAX, &dummymval);
SETWOFF;
cfnpc = &(TREF(fnpca)).fnpcs[src->fnpc_indx - 1];
assert(cfnpc->last_str.addr == src->str.addr);
assert(cfnpc->last_str.len == src->str.len);
assert(cfnpc->delim == delim);
assert(0 < cfnpc->npcs);
if (FNPC_ELEM_MAX > cfnpc->npcs)
{ /* We ran out of text so the scan is complete. This is basically the same
* as case #2 above.
*/
pfx_str_len = cfnpc->pstart[cfnpc->npcs] - dlmlen;
delim_cnt = (size_t)(ind - cfnpc->npcs);
sfx_start_offset = 0;
sfx_str_len = 0;
cpy_cache_lines = cfnpc->npcs;
} else
{ /* We have a case where the piece we want cannot be kept in cache. In the special
* case where there is no more text to handle, we don't need to scan further. Otherwise
* we prime the pump and continue the scan where the cache left off.
*/
if ((pfx_scan_offset = cfnpc->pstart[FNPC_ELEM_MAX]) < src->str.len) /* Note assignment */
/* Normal case where we prime the pump */
do_scan = TRUE;
else
{ /* Special case -- no more text to scan */
pfx_str_len = cfnpc->pstart[FNPC_ELEM_MAX] - dlmlen;
sfx_start_offset = 0;
sfx_str_len = 0;
}
delim_cnt = (size_t)ind - FNPC_ELEM_MAX;
cpy_cache_lines = FNPC_ELEM_MAX;
}
}
} else
{ /* Source is not defined -- treat as a null string */
pfx_str_len = sfx_str_len = sfx_start_offset = 0;
delim_cnt = (size_t)ind - 1;
}
/* If we have been forced to do our own scan, do that here. Note the variable pfx_scan_offset has been
* set to where the scan should begin in the src string and delim_cnt has been set to how many delimiters
* still need to be processed.
*/
if (do_scan)
{ /* Scan the line isolating prefix piece, and end of the
* piece being replaced
*/
COUNT_EVENT(cs_small);
end_pfx = start_sfx = (unsigned char *)src->str.addr + pfx_scan_offset;
end_src = (unsigned char *)src->str.addr + src->str.len;
/* The compiler would unroll this loop this way anyway but we want to
* adjust the start_sfx pointer after the loop but only if we have gone
* into it at least once.
*/
if ((0 < delim_cnt) && (start_sfx < end_src))
{
do
{
end_pfx = start_sfx;
delim_last_scan = FALSE; /* Whether delimiter is last character scanned */
while (start_sfx < end_src)
{
valid_char = UTF8_VALID(start_sfx, end_src, mblen); /* Length of next char */
if (!valid_char)
{ /* Next character is not valid unicode. If badchar error is not inhibited,
* signal it now. If it is inhibited, just treat the character as a single
* character and continue.
*/
if (!badchar_inhibit)
utf8_badchar(0, start_sfx, end_src, 0, NULL);
assert(1 == mblen);
}
/* Getting mblen first allows us to do quick length compare before the
* heavier weight memcmp call.
*/
assert(0 < mblen);
if (mblen == dlmlen && 0 == memcmp(start_sfx, ldelim.unibytes_val, dlmlen))
{
delim_last_scan = TRUE;
break;
}
/* Increment ptrs by size of found char */
start_sfx += mblen;
}
start_sfx += dlmlen;
delim_cnt--;
} while ((0 < delim_cnt) && (start_sfx < end_src));
/* We have to backup up the suffix start pointer except under the condition
* that the last character in the buffer is the last delimiter we were looking
* for.
*/
if ((0 == delim_cnt) || (start_sfx < end_src) || !delim_last_scan)
start_sfx -= dlmlen; /* Back up suffix to include delimiter char */
/* If we scanned to the end (no text left) and still have delimiters to
* find, the entire src text should be part of the prefix
*/
if ((start_sfx >= end_src) && (0 < delim_cnt))
{
end_pfx = start_sfx;
if (delim_last_scan) /* if last char was delim, reduce delim cnt */
--delim_cnt;
}
} else
{
/* If not doing any token finding, then this count becomes the number
* of tokens to output. Adjust accordingly.
*/
if (0 < delim_cnt)
--delim_cnt;
}
INCR_COUNT(cs_small_pcs, (int)((size_t)ind - delim_cnt));
/* Now having the following situation:
* end_pfx -> end of the prefix piece including delimiter
* start_sfx -> start of suffix piece (with delimiter) or = end_pfx/src->str.addr if none
*/
pfx_str_len = (int)(end_pfx - (unsigned char *)src->str.addr);
if (0 > pfx_str_len)
pfx_str_len = 0;
sfx_start_offset = (int)(start_sfx - (unsigned char *)src->str.addr);
sfx_str_len = src->str.len - sfx_start_offset;
if (0 > sfx_str_len)
sfx_str_len = 0;
}
/* Calculate total string len. delim_cnt has needed padding delimiters for null fields */
str_len = (size_t)expr->str.len + (size_t)pfx_str_len + (delim_cnt * (size_t)dlmlen) + (size_t)sfx_str_len;
if (MAX_STRLEN < str_len)
rts_error(VARLSTCNT(1) ERR_MAXSTRLEN);
ENSURE_STP_FREE_SPACE((int)str_len);
str_addr = stringpool.free;
start_pfx = (unsigned char *)src->str.addr;
/* copy prefix */
if (0 < pfx_str_len)
{
memcpy(str_addr, src->str.addr, pfx_str_len);
str_addr += pfx_str_len;
}
/* copy delimiters */
while (0 < delim_cnt--)
{
memcpy(str_addr, ldelim.unibytes_val, dlmlen);
str_addr += dlmlen;
}
/* copy expression */
if (0 < expr->str.len)
{
memcpy(str_addr, expr->str.addr, expr->str.len);
str_addr += expr->str.len;
}
/* copy suffix */
if (0 < sfx_str_len)
{
memcpy(str_addr, start_pfx + sfx_start_offset, sfx_str_len);
str_addr += sfx_str_len;
}
assert((str_addr - stringpool.free) == str_len);
dst->mvtype = MV_STR;
dst->str.len = INTCAST(str_addr - stringpool.free);
dst->str.addr = (char *)stringpool.free;
stringpool.free = str_addr;
/* If available, update the cache information for this newly created mval to hopefully
* give it a head start on its next usage. Note that we can only copy over the cache info
* for the prefix. We cannot include information for the 'expression' except where it starts
* because the expression could itself contain delimiters that would be found on a rescan.
*/
if (0 < cpy_cache_lines)
{
pfnpc = cfnpc; /* pointer for src mval's cache */
do
{
cfnpc = (TREF(fnpca)).fnpcsteal; /* Next cache element to steal */
if ((TREF(fnpca)).fnpcmax < cfnpc)
cfnpc = &(TREF(fnpca)).fnpcs[0];
(TREF(fnpca)).fnpcsteal = cfnpc + 1; /* -> next element to steal */
} while (cfnpc == pfnpc); /* Make sure we don't step on ourselves */
cfnpc->last_str = dst->str; /* Save validation info */
cfnpc->delim = delim;
cfnpc->npcs = cpy_cache_lines;
dst->fnpc_indx = cfnpc->indx + 1; /* Save where we are putting this element
* (1 based index in mval so 0 isn't so common)
*/
memcpy(&cfnpc->pstart[0], &pfnpc->pstart[0], (cfnpc->npcs + 1) * SIZEOF(unsigned int));
} else
/* No cache available -- just reset index pointer to get fastest cache validation failure */
dst->fnpc_indx = (unsigned char)-1;
}