mirror of https://github.com/apache/lucene.git
- Added Unicode range to fix tokenization of Korean - http://issues.apache.org/jira/browse/LUCENE-444
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@294982 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ae706faf12
commit
0eb4c32432
|
@ -112,6 +112,7 @@ TOKEN : { // token patterns
|
|||
"\u3300"-"\u337f",
|
||||
"\u3400"-"\u3d2d",
|
||||
"\u4e00"-"\u9fff",
|
||||
"\uac00"-"\ud7af",
|
||||
"\uf900"-"\ufaff"
|
||||
]
|
||||
>
|
||||
|
|
|
@ -41,7 +41,7 @@ private final void jjCheckNAddStates(int start)
|
|||
jjCheckNAdd(jjnextStates[start + 1]);
|
||||
}
|
||||
static final long[] jjbitVec0 = {
|
||||
0x1ff0000000000000L, 0xffffffffffffc000L, 0xffffffffL, 0x600000000000000L
|
||||
0x1ff0000000000000L, 0xffffffffffffc000L, 0xfffff000ffffffffL, 0x6000000007fffffL
|
||||
};
|
||||
static final long[] jjbitVec2 = {
|
||||
0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL
|
||||
|
@ -56,30 +56,33 @@ static final long[] jjbitVec5 = {
|
|||
0x3fffffffffffL, 0x0L, 0x0L, 0x0L
|
||||
};
|
||||
static final long[] jjbitVec6 = {
|
||||
0x1600L, 0x0L, 0x0L, 0x0L
|
||||
0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffL, 0x0L
|
||||
};
|
||||
static final long[] jjbitVec7 = {
|
||||
0x0L, 0xffc000000000L, 0x0L, 0xffc000000000L
|
||||
0x1600L, 0x0L, 0x0L, 0x0L
|
||||
};
|
||||
static final long[] jjbitVec8 = {
|
||||
0x0L, 0x3ff00000000L, 0x0L, 0x3ff000000000000L
|
||||
0x0L, 0xffc000000000L, 0x0L, 0xffc000000000L
|
||||
};
|
||||
static final long[] jjbitVec9 = {
|
||||
0x0L, 0xffc000000000L, 0x0L, 0xff8000000000L
|
||||
0x0L, 0x3ff00000000L, 0x0L, 0x3ff000000000000L
|
||||
};
|
||||
static final long[] jjbitVec10 = {
|
||||
0x0L, 0xffc000000000L, 0x0L, 0x0L
|
||||
0x0L, 0xffc000000000L, 0x0L, 0xff8000000000L
|
||||
};
|
||||
static final long[] jjbitVec11 = {
|
||||
0x0L, 0x3ff0000L, 0x0L, 0x3ff0000L
|
||||
0x0L, 0xffc000000000L, 0x0L, 0x0L
|
||||
};
|
||||
static final long[] jjbitVec12 = {
|
||||
0x0L, 0x3ffL, 0x0L, 0x0L
|
||||
0x0L, 0x3ff0000L, 0x0L, 0x3ff0000L
|
||||
};
|
||||
static final long[] jjbitVec13 = {
|
||||
0xfffffffeL, 0x0L, 0x0L, 0x0L
|
||||
0x0L, 0x3ffL, 0x0L, 0x0L
|
||||
};
|
||||
static final long[] jjbitVec14 = {
|
||||
0xfffffffeL, 0x0L, 0x0L, 0x0L
|
||||
};
|
||||
static final long[] jjbitVec15 = {
|
||||
0x0L, 0x0L, 0x0L, 0xff7fffffff7fffffL
|
||||
};
|
||||
private final int jjMoveNfa_0(int startState, int curPos)
|
||||
|
@ -978,6 +981,8 @@ private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, lo
|
|||
return ((jjbitVec4[i2] & l2) != 0L);
|
||||
case 61:
|
||||
return ((jjbitVec5[i2] & l2) != 0L);
|
||||
case 215:
|
||||
return ((jjbitVec6[i2] & l2) != 0L);
|
||||
default :
|
||||
if ((jjbitVec0[i1] & l1) != 0L)
|
||||
return true;
|
||||
|
@ -989,18 +994,18 @@ private static final boolean jjCanMove_1(int hiByte, int i1, int i2, long l1, lo
|
|||
switch(hiByte)
|
||||
{
|
||||
case 6:
|
||||
return ((jjbitVec8[i2] & l2) != 0L);
|
||||
case 11:
|
||||
return ((jjbitVec9[i2] & l2) != 0L);
|
||||
case 13:
|
||||
case 11:
|
||||
return ((jjbitVec10[i2] & l2) != 0L);
|
||||
case 14:
|
||||
case 13:
|
||||
return ((jjbitVec11[i2] & l2) != 0L);
|
||||
case 16:
|
||||
case 14:
|
||||
return ((jjbitVec12[i2] & l2) != 0L);
|
||||
case 16:
|
||||
return ((jjbitVec13[i2] & l2) != 0L);
|
||||
default :
|
||||
if ((jjbitVec6[i1] & l1) != 0L)
|
||||
if ((jjbitVec7[i2] & l2) == 0L)
|
||||
if ((jjbitVec7[i1] & l1) != 0L)
|
||||
if ((jjbitVec8[i2] & l2) == 0L)
|
||||
return false;
|
||||
else
|
||||
return true;
|
||||
|
@ -1012,9 +1017,9 @@ private static final boolean jjCanMove_2(int hiByte, int i1, int i2, long l1, lo
|
|||
switch(hiByte)
|
||||
{
|
||||
case 0:
|
||||
return ((jjbitVec14[i2] & l2) != 0L);
|
||||
return ((jjbitVec15[i2] & l2) != 0L);
|
||||
default :
|
||||
if ((jjbitVec13[i1] & l1) != 0L)
|
||||
if ((jjbitVec14[i1] & l1) != 0L)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue