mirror of https://github.com/apache/lucene.git
LUCENE-1490: fix latin1 conversion of HALFWIDTH_AND_FULLWIDTH_FORMS characters to only apply to the correct subset
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@755666 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
68f4d8b1a8
commit
96863198a5
|
@ -148,11 +148,13 @@ public final class CJKTokenizer extends Tokenizer {
|
||||||
|| (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS)
|
|| (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS)
|
||||||
) {
|
) {
|
||||||
if (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
|
if (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
|
||||||
/** convert HALFWIDTH_AND_FULLWIDTH_FORMS to BASIC_LATIN */
|
|
||||||
int i = (int) c;
|
int i = (int) c;
|
||||||
|
if (i >= 65281 && i <= 65374) {
|
||||||
|
/** convert certain HALFWIDTH_AND_FULLWIDTH_FORMS to BASIC_LATIN */
|
||||||
i = i - 65248;
|
i = i - 65248;
|
||||||
c = (char) i;
|
c = (char) i;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// if the current character is a letter or "_" "+" "#"
|
// if the current character is a letter or "_" "+" "#"
|
||||||
if (Character.isLetterOrDigit(c)
|
if (Character.isLetterOrDigit(c)
|
||||||
|
|
Loading…
Reference in New Issue