fis-gtm/sr_unix/gtm_conv.c

214 lines
7.1 KiB
C

/****************************************************************
* *
* Copyright 2006, 2011 Fidelity Information Services, Inc *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
* the license, please stop and do not read further. *
* *
****************************************************************/
#include "mdef.h"
#include "gtm_string.h"
#include "stringpool.h"
#include "gtm_caseconv.h"
#include "gtm_icu_api.h"
#include "gtm_utf8.h"
#include "gtm_conv.h"
LITDEF mstr chset_names[CHSET_MAX_IDX_ALL] =
{ /* Supported character set (CHSET) codes for the 3-argument form of $ZCONVERT.
* NOTE: Update the *_CHSET_LEN macros below if new CHSETs are added.
*/
{1, 1, "M"}, /* "M" should be the first CHSET (0th index of "chset_names" array). verify_chset() callers rely on this.
* $ZCONVERT doesn't support M, but I/O does */
{5, 5, "UTF-8"},
{6, 6, "UTF-16"},
{8, 8, "UTF-16LE"},
{8, 8, "UTF-16BE"},
{5, 5, "ASCII"},
{6, 6, "EBCDIC"},
{6, 6, "BINARY"}
};
#define MIN_CHSET_LEN 1 /* minimum length of CHSET names */
#define MAX_CHSET_LEN 8 /* maximum length of CHSET names */
/* This array holds the ICU converter handles corresponding to the respective
* CHSET name in the table chset_names[]
*/
GBLDEF UConverter *chset_desc[CHSET_MAX_IDX];
GBLDEF casemap_t casemaps[MAX_CASE_IDX] =
{ /* Supported case mappings and their disposal conversion routines for both $ZCHSET modes.
* Note: since UTF-8 disposal functions for "U" and "L" are ICU "function pointers" rather
* rather than their direct addresses, they are initialized in gtm_utf8_init() instead
*/
{"U", &lower_to_upper, NULL },
{"L", &upper_to_lower, NULL },
{"T", NULL, &gtm_strToTitle}
};
GBLREF spdesc stringpool;
LITREF unsigned char lower_to_upper_table[];
error_def(ERR_ICUERROR);
error_def(ERR_MAXSTRLEN);
/* Routine to verify given parameter against supported CHSETs.
* Valid arguments (case-insensitive):
* "M", "UTF-8", "UTF-16", "UTF-16LE" and "UTF-16BE"
* Returns
* -1 (if invalid argument) or
* 0 (if "M") or
* non-zero index to an entry of chset_names[] (if valid)
*/
int verify_chset(const mstr *parm)
{
const mstr *vptr, *vptr_top;
char mode[MAX_CHSET_LEN];
if ((MIN_CHSET_LEN > parm->len) || (MAX_CHSET_LEN < parm->len))
return -1; /* Parameter is smaller or larger than any possible CHSET */
/* Make a translated copy of the parm */
lower_to_upper((unsigned char *)mode, (unsigned char *)parm->addr, parm->len);
/* See if any of our possibilities match */
for (vptr = chset_names, vptr_top = vptr + CHSET_MAX_IDX_ALL; vptr < vptr_top; ++vptr)
{
if (parm->len == vptr->len &&
0 == memcmp(mode, vptr->addr, vptr->len))
return (int)(vptr - chset_names); /* return the index */
}
return -1;
}
/* Routine to verify given parameter against supported case conversion codes.
* Valid arguments (case-insensitive):
* "U", "L" and "T"
* Returns
* -1 (if invalid argument) or
* index to an entry of casemaps[] (if valid)
*/
int verify_case(const mstr *parm)
{
unsigned char c;
int index;
if (1 == parm->len)
{
c = lower_to_upper_table[*(uchar_ptr_t)parm->addr];
if (!gtm_utf8_mode && 'T' == c) /* title case is not supported in "M" mode */
return -1;
for (index = 0; index < MAX_CASE_IDX; ++index)
{
if (c == casemaps[index].code[0])
return index;
}
}
return -1;
}
int32_t gtm_strToTitle(UChar *dst, int32_t dstlen, const UChar *src, int32_t srclen,
const char *locale, UErrorCode *status)
{
return u_strToTitle(dst, dstlen, src, srclen, NULL, locale, status);
}
void callback_stop(const void* context, UConverterToUnicodeArgs *args, const char *codeUnits,
int32_t length, UConverterCallbackReason reason, UErrorCode *pErrorCode)
{
/* EMPTY BODY:
* By not resetting the pErrorCode, this routine returns to ICU routine directing
* it to stop and return immediately
*/
}
UConverter* get_chset_desc(const mstr* chset)
{
int chset_indx;
UErrorCode status;
if ((0 >= (chset_indx = verify_chset(chset))) || (CHSET_MAX_IDX <= chset_indx))
return NULL;
if (NULL == chset_desc[chset_indx])
{
status = U_ZERO_ERROR;
chset_desc[chset_indx] = ucnv_open(chset_names[chset_indx].addr, &status);
if (U_FAILURE(status))
rts_error(VARLSTCNT(3) ERR_ICUERROR, 1, status); /* strange and unexpected ICU unhappiness */
/* Initialize the callback for illegal/invalid characters, so that conversion
* stops at the first illegal character rather than continuing with replacement */
status = U_ZERO_ERROR;
ucnv_setToUCallBack(chset_desc[chset_indx], &callback_stop, NULL, NULL, NULL, &status);
if (U_FAILURE(status))
rts_error(VARLSTCNT(3) ERR_ICUERROR, 1, status); /* strange and unexpected ICU unhappiness */
}
return chset_desc[chset_indx];
}
/* Startup initializations of conversion data */
void gtm_conv_init(void)
{
assert(gtm_utf8_mode);
/* Implicitly created CHSET descriptor for UTF-8 */
get_chset_desc(&chset_names[CHSET_UTF8]);
assert(NULL != chset_desc[CHSET_UTF8]);
/* initialize the case conversion disposal functions */
casemaps[0].u = u_strToUpper;
casemaps[1].u = u_strToLower;
}
int gtm_conv(UConverter* from, UConverter* to, mstr *src, char* dstbuff, int* bufflen)
{
char *dstptr, *dstbase, *srcptr;
const char *ichset;
int dstlen, src_charlen, srclen;
UErrorCode status, status1;
if (0 == src->len)
return 0;
if (NULL == dstbuff)
{
/* Compute the stringpool buffer space needed for conversion given that source
* is encoded in the ichset representation. The ICU functions ucnv_getMinCharSize()
* and ucnv_getMaxCharSize() are used to compute the minimum and maximum number of
* bytes required per UChar if converted from/to ichset/ochset respectively
*/
src_charlen = (src->len / ucnv_getMinCharSize(from)) + 1; /* number of UChar's from ichset */
dstlen = UCNV_GET_MAX_BYTES_FOR_STRING(src_charlen, ucnv_getMaxCharSize(to));
dstlen = (dstlen > MAX_STRLEN) ? MAX_STRLEN : dstlen;
ENSURE_STP_FREE_SPACE(dstlen);
dstbase = (char *)stringpool.free;
} else
{
dstbase = dstbuff;
dstlen = *bufflen;
}
srcptr = src->addr;
srclen = (int)src->len;
dstptr = dstbase;
status = U_ZERO_ERROR; /* initialization to "success" is required by ICU */
ucnv_convertEx(to, from, &dstptr, dstptr + dstlen, (const char**)&srcptr, srcptr + srclen,
NULL, NULL, NULL, NULL, TRUE, TRUE, &status);
if (U_FAILURE(status))
{
if (U_BUFFER_OVERFLOW_ERROR == status)
{ /* translation requires more space than the maximum allowed GT.M string size */
if (NULL == dstbuff)
rts_error(VARLSTCNT(1) ERR_MAXSTRLEN);
else
{
/* Insufficient buffer passed. Return the required buffer length */
src_charlen = (srclen / ucnv_getMinCharSize(from)) + 1;
*bufflen = UCNV_GET_MAX_BYTES_FOR_STRING(src_charlen, ucnv_getMaxCharSize(to));
return -1;
}
}
status1 = U_ZERO_ERROR;
ichset = ucnv_getName(from, &status1);
assert(U_SUCCESS(status1));
UTF8_BADCHAR(1,(unsigned char *) (srcptr - 1), NULL,STRLEN(ichset), ichset);
}
return (int) (dstptr - dstbase);
}