fis-gtm/sr_unix/op_fnextract.h

100 lines
3.1 KiB
C

/****************************************************************
* *
* Copyright 2008 Fidelity Information Services, Inc *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
* the license, please stop and do not read further. *
* *
****************************************************************/
/* Note this routine is built as op_fnextract() on all but IA64 platforms
where it is instead built as op_fnextract2() due to linkage requirements
where values in the transfer table must be consistently assembler or
"C" for the correct call signature to be made. Since this routine is
changed in the transfer table under certain conditions (unicode or not),
the interface needed to be consistent so an assembler stub is made to
call this C routine to match the alternate op_fnzextract that could be
in that transfer table slot.
*/
GBLREF boolean_t badchar_inhibit;
void OP_FNEXTRACT(int last, int first, mval *src, mval *dest)
{
char *srcbase, *srctop, *srcptr;
int len, skip, bytelen;
MV_FORCE_STR(src);
MV_INIT(dest);
dest->mvtype = MV_STR;
if (first <= 0)
first = 1;
else if (first > src->str.len)
{
dest->str.len = 0;
return;
}
if (last > 0 && last > src->str.len)
last = src->str.len;
if (MV_IS_SINGLEBYTE(src))
{ /* fast-path extraction of an entirely single byte string */
if ((len = last - first + 1) > 0)
{
dest->str.addr = src->str.addr + first - 1;
dest->str.len = len;
if (badchar_inhibit)
{
dest->str.char_len = dest->str.len;
dest->mvtype |= MV_UTF_LEN;
} else
MV_FORCE_LEN(dest); /* catch BADCHARs (if any) */
} else
dest->str.len = 0;
} else
{ /* generic extraction of a multi-byte string */
if ((len = last - first + 1) <= 0)
{
dest->str.len = 0;
return;
}
srcbase = src->str.addr;
srctop = srcbase + src->str.len;
for (srcptr = srcbase, skip = first - 1; (skip > 0 && srcptr < srctop); --skip)
{ /* skip to the character position 'first' */
if (!UTF8_VALID(srcptr, srctop, bytelen) && !badchar_inhibit)
UTF8_BADCHAR(0, srcptr, srctop, 0, NULL);
srcptr += bytelen;
}
assert(srcptr <= srctop);
if (skip > 0)
{ /* first position is past the last character */
dest->str.len = 0;
return;
}
dest->str.addr = srcbase = srcptr;
if (srcbase + len >= srctop)
{ /* A more efficient implementation of usages like $E(str,99999) where there is no need */
/* to scan the rest of the string unless BADCHAR errors need to be caught */
dest->str.len = INTCAST(srctop - srcbase);
if (!badchar_inhibit)
MV_FORCE_LEN(dest);
} else
{ /* Skip the next 'len' characters and trigger BADCHAR if need to be caught */
for (skip = len; (skip > 0 && srcptr < srctop); --skip)
{
if (!UTF8_VALID(srcptr, srctop, bytelen) && !badchar_inhibit)
UTF8_BADCHAR(0, srcptr, srctop, 0, NULL);
srcptr += bytelen;
}
assert(srcptr <= srctop);
dest->str.len = INTCAST(srcptr - srcbase);
dest->str.char_len = len - skip;
dest->mvtype |= MV_UTF_LEN;
}
}
}