fis-gtm/sr_unix/gtm_utf8_stx.c

92 lines
2.5 KiB
C

/****************************************************************
* *
* Copyright 2006, 2007 Fidelity Information Services, Inc.*
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
* the license, please stop and do not read further. *
* *
****************************************************************/
#include "mdef.h"
#include "gtm_string.h"
#include "gtm_stdlib.h"
#include "error.h"
#include "util.h"
#include "gtm_icu_api.h"
#include "gtm_utf8.h"
GBLREF boolean_t badchar_inhibit;
GBLREF boolean_t gtm_utf8_mode;
error_def(ERR_BADCHAR);
/* This is the same as "utf8_len" except that it invokes UTF8_BADCHAR_STX macro which does a stx_error instead of rts_error.
* If UTF8_BADCHAR_STX is invoked, this function returns a -1 signalling a parse error.
*/
int utf8_len_stx(mstr* str)
{
int charlen, bytelen;
char *ptrtop, *ptr;
assert(gtm_utf8_mode);
ptr = str->addr;
ptrtop = ptr + str->len;
charlen = 0;
if (!badchar_inhibit)
{
for (; ptr < ptrtop; charlen++, ptr += bytelen)
{
if (!UTF8_VALID(ptr, ptrtop, bytelen))
{
UTF8_BADCHAR_STX(0, ptr, ptrtop, 0, NULL);
return -1;
}
}
} else
{
for (; ptr < ptrtop; charlen++)
ptr = (char *)UTF8_MBNEXT(ptr, ptrtop);
}
assert(ptr == ptrtop);
str->char_len = charlen;
return charlen;
}
/* This function is the same as "utf8_badchar" except that it does a "stx_error" instead of "rts_error". This helps
* to identify the line in the M program that has the compile time error.
*/
void utf8_badchar_stx(int len, unsigned char *str, unsigned char *strtop, int chset_len, unsigned char *chset)
{
unsigned char *strptr, *strend, *outstr;
unsigned char errtxt[OUT_BUFF_SIZE];
int tmplen;
assert(gtm_utf8_mode);
if (len == 0)
{ /* Determine the maximal length (upto 4 bytes) of the invalid byte sequence */
for (strend = str; len <= 4 && strend < strtop; ++strend, ++len)
{
if (UTF8_VALID(strend, strtop, tmplen))
break;
}
} else
strend = str + len;
strptr = str;
outstr = &errtxt[0];
for (; strptr < strend; ++strptr, ++outstr)
{
outstr = (unsigned char*)i2asc((uchar_ptr_t)outstr, *strptr);
*outstr = ',';
}
if (len > 0) /* do not include the last comma */
outstr--;
if (chset_len > 0)
stx_error(ERR_BADCHAR, 4, (outstr - &errtxt[0]), &errtxt[0], chset_len, chset);
else
stx_error(ERR_BADCHAR, 4, (outstr - &errtxt[0]), &errtxt[0], LEN_AND_LIT(UTF8_NAME));
}