214 lines
7.1 KiB
C
214 lines
7.1 KiB
C
/****************************************************************
|
|
* *
|
|
* Copyright 2006, 2011 Fidelity Information Services, Inc *
|
|
* *
|
|
* This source code contains the intellectual property *
|
|
* of its copyright holder(s), and is made available *
|
|
* under a license. If you do not know the terms of *
|
|
* the license, please stop and do not read further. *
|
|
* *
|
|
****************************************************************/
|
|
|
|
#include "mdef.h"
|
|
#include "gtm_string.h"
|
|
#include "stringpool.h"
|
|
#include "gtm_caseconv.h"
|
|
#include "gtm_icu_api.h"
|
|
#include "gtm_utf8.h"
|
|
#include "gtm_conv.h"
|
|
|
|
LITDEF mstr chset_names[CHSET_MAX_IDX_ALL] =
|
|
{ /* Supported character set (CHSET) codes for the 3-argument form of $ZCONVERT.
|
|
* NOTE: Update the *_CHSET_LEN macros below if new CHSETs are added.
|
|
*/
|
|
{1, 1, "M"}, /* "M" should be the first CHSET (0th index of "chset_names" array). verify_chset() callers rely on this.
|
|
* $ZCONVERT doesn't support M, but I/O does */
|
|
{5, 5, "UTF-8"},
|
|
{6, 6, "UTF-16"},
|
|
{8, 8, "UTF-16LE"},
|
|
{8, 8, "UTF-16BE"},
|
|
{5, 5, "ASCII"},
|
|
{6, 6, "EBCDIC"},
|
|
{6, 6, "BINARY"}
|
|
};
|
|
#define MIN_CHSET_LEN 1 /* minimum length of CHSET names */
|
|
#define MAX_CHSET_LEN 8 /* maximum length of CHSET names */
|
|
|
|
/* This array holds the ICU converter handles corresponding to the respective
|
|
* CHSET name in the table chset_names[]
|
|
*/
|
|
GBLDEF UConverter *chset_desc[CHSET_MAX_IDX];
|
|
GBLDEF casemap_t casemaps[MAX_CASE_IDX] =
|
|
{ /* Supported case mappings and their disposal conversion routines for both $ZCHSET modes.
|
|
* Note: since UTF-8 disposal functions for "U" and "L" are ICU "function pointers" rather
|
|
* rather than their direct addresses, they are initialized in gtm_utf8_init() instead
|
|
*/
|
|
{"U", &lower_to_upper, NULL },
|
|
{"L", &upper_to_lower, NULL },
|
|
{"T", NULL, >m_strToTitle}
|
|
};
|
|
|
|
GBLREF spdesc stringpool;
|
|
|
|
LITREF unsigned char lower_to_upper_table[];
|
|
|
|
error_def(ERR_ICUERROR);
|
|
error_def(ERR_MAXSTRLEN);
|
|
|
|
/* Routine to verify given parameter against supported CHSETs.
|
|
* Valid arguments (case-insensitive):
|
|
* "M", "UTF-8", "UTF-16", "UTF-16LE" and "UTF-16BE"
|
|
* Returns
|
|
* -1 (if invalid argument) or
|
|
* 0 (if "M") or
|
|
* non-zero index to an entry of chset_names[] (if valid)
|
|
*/
|
|
int verify_chset(const mstr *parm)
|
|
{
|
|
const mstr *vptr, *vptr_top;
|
|
char mode[MAX_CHSET_LEN];
|
|
|
|
if ((MIN_CHSET_LEN > parm->len) || (MAX_CHSET_LEN < parm->len))
|
|
return -1; /* Parameter is smaller or larger than any possible CHSET */
|
|
/* Make a translated copy of the parm */
|
|
lower_to_upper((unsigned char *)mode, (unsigned char *)parm->addr, parm->len);
|
|
/* See if any of our possibilities match */
|
|
for (vptr = chset_names, vptr_top = vptr + CHSET_MAX_IDX_ALL; vptr < vptr_top; ++vptr)
|
|
{
|
|
if (parm->len == vptr->len &&
|
|
0 == memcmp(mode, vptr->addr, vptr->len))
|
|
return (int)(vptr - chset_names); /* return the index */
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/* Routine to verify given parameter against supported case conversion codes.
|
|
* Valid arguments (case-insensitive):
|
|
* "U", "L" and "T"
|
|
* Returns
|
|
* -1 (if invalid argument) or
|
|
* index to an entry of casemaps[] (if valid)
|
|
*/
|
|
int verify_case(const mstr *parm)
|
|
{
|
|
unsigned char c;
|
|
int index;
|
|
|
|
if (1 == parm->len)
|
|
{
|
|
c = lower_to_upper_table[*(uchar_ptr_t)parm->addr];
|
|
if (!gtm_utf8_mode && 'T' == c) /* title case is not supported in "M" mode */
|
|
return -1;
|
|
for (index = 0; index < MAX_CASE_IDX; ++index)
|
|
{
|
|
if (c == casemaps[index].code[0])
|
|
return index;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
int32_t gtm_strToTitle(UChar *dst, int32_t dstlen, const UChar *src, int32_t srclen,
|
|
const char *locale, UErrorCode *status)
|
|
{
|
|
return u_strToTitle(dst, dstlen, src, srclen, NULL, locale, status);
|
|
}
|
|
|
|
void callback_stop(const void* context, UConverterToUnicodeArgs *args, const char *codeUnits,
|
|
int32_t length, UConverterCallbackReason reason, UErrorCode *pErrorCode)
|
|
{
|
|
/* EMPTY BODY:
|
|
* By not resetting the pErrorCode, this routine returns to ICU routine directing
|
|
* it to stop and return immediately
|
|
*/
|
|
}
|
|
|
|
UConverter* get_chset_desc(const mstr* chset)
|
|
{
|
|
int chset_indx;
|
|
UErrorCode status;
|
|
|
|
if ((0 >= (chset_indx = verify_chset(chset))) || (CHSET_MAX_IDX <= chset_indx))
|
|
return NULL;
|
|
if (NULL == chset_desc[chset_indx])
|
|
{
|
|
status = U_ZERO_ERROR;
|
|
chset_desc[chset_indx] = ucnv_open(chset_names[chset_indx].addr, &status);
|
|
if (U_FAILURE(status))
|
|
rts_error(VARLSTCNT(3) ERR_ICUERROR, 1, status); /* strange and unexpected ICU unhappiness */
|
|
/* Initialize the callback for illegal/invalid characters, so that conversion
|
|
* stops at the first illegal character rather than continuing with replacement */
|
|
status = U_ZERO_ERROR;
|
|
ucnv_setToUCallBack(chset_desc[chset_indx], &callback_stop, NULL, NULL, NULL, &status);
|
|
if (U_FAILURE(status))
|
|
rts_error(VARLSTCNT(3) ERR_ICUERROR, 1, status); /* strange and unexpected ICU unhappiness */
|
|
}
|
|
return chset_desc[chset_indx];
|
|
}
|
|
|
|
/* Startup initializations of conversion data */
|
|
void gtm_conv_init(void)
|
|
{
|
|
assert(gtm_utf8_mode);
|
|
/* Implicitly created CHSET descriptor for UTF-8 */
|
|
get_chset_desc(&chset_names[CHSET_UTF8]);
|
|
assert(NULL != chset_desc[CHSET_UTF8]);
|
|
/* initialize the case conversion disposal functions */
|
|
casemaps[0].u = u_strToUpper;
|
|
casemaps[1].u = u_strToLower;
|
|
}
|
|
|
|
int gtm_conv(UConverter* from, UConverter* to, mstr *src, char* dstbuff, int* bufflen)
|
|
{
|
|
char *dstptr, *dstbase, *srcptr;
|
|
const char *ichset;
|
|
int dstlen, src_charlen, srclen;
|
|
UErrorCode status, status1;
|
|
|
|
if (0 == src->len)
|
|
return 0;
|
|
if (NULL == dstbuff)
|
|
{
|
|
/* Compute the stringpool buffer space needed for conversion given that source
|
|
* is encoded in the ichset representation. The ICU functions ucnv_getMinCharSize()
|
|
* and ucnv_getMaxCharSize() are used to compute the minimum and maximum number of
|
|
* bytes required per UChar if converted from/to ichset/ochset respectively
|
|
*/
|
|
src_charlen = (src->len / ucnv_getMinCharSize(from)) + 1; /* number of UChar's from ichset */
|
|
dstlen = UCNV_GET_MAX_BYTES_FOR_STRING(src_charlen, ucnv_getMaxCharSize(to));
|
|
dstlen = (dstlen > MAX_STRLEN) ? MAX_STRLEN : dstlen;
|
|
ENSURE_STP_FREE_SPACE(dstlen);
|
|
dstbase = (char *)stringpool.free;
|
|
} else
|
|
{
|
|
dstbase = dstbuff;
|
|
dstlen = *bufflen;
|
|
}
|
|
srcptr = src->addr;
|
|
srclen = (int)src->len;
|
|
dstptr = dstbase;
|
|
status = U_ZERO_ERROR; /* initialization to "success" is required by ICU */
|
|
ucnv_convertEx(to, from, &dstptr, dstptr + dstlen, (const char**)&srcptr, srcptr + srclen,
|
|
NULL, NULL, NULL, NULL, TRUE, TRUE, &status);
|
|
if (U_FAILURE(status))
|
|
{
|
|
if (U_BUFFER_OVERFLOW_ERROR == status)
|
|
{ /* translation requires more space than the maximum allowed GT.M string size */
|
|
if (NULL == dstbuff)
|
|
rts_error(VARLSTCNT(1) ERR_MAXSTRLEN);
|
|
else
|
|
{
|
|
/* Insufficient buffer passed. Return the required buffer length */
|
|
src_charlen = (srclen / ucnv_getMinCharSize(from)) + 1;
|
|
*bufflen = UCNV_GET_MAX_BYTES_FOR_STRING(src_charlen, ucnv_getMaxCharSize(to));
|
|
return -1;
|
|
}
|
|
}
|
|
status1 = U_ZERO_ERROR;
|
|
ichset = ucnv_getName(from, &status1);
|
|
assert(U_SUCCESS(status1));
|
|
UTF8_BADCHAR(1,(unsigned char *) (srcptr - 1), NULL,STRLEN(ichset), ichset);
|
|
}
|
|
return (int) (dstptr - dstbase);
|
|
}
|