mirror of https://github.com/apache/lucene.git
LUCENE-8603 Fix the inversion of right ids for additional nouns in the Korean user dictionary
This commit is contained in:
parent
a62b5941b0
commit
53b05c5e7e
|
@ -237,9 +237,8 @@ Build
|
|||
|
||||
Bug fixes:
|
||||
|
||||
* LUCENE-8548: The KoreanTokenizer no longer splits unknown words on combining diacritics and
|
||||
detects script boundaries more accurately with Character#UnicodeScript#of.
|
||||
(Christophe Bismuth, Jim Ferenczi)
|
||||
* LUCENE-8603: Fix the inversion of right ids for additional nouns in the Korean user dictionary.
|
||||
(Yoo Jeongin via Jim Ferenczi)
|
||||
|
||||
New Features
|
||||
|
||||
|
@ -265,6 +264,10 @@ Improvements
|
|||
* LUCENE-8575: SegmentInfos#toString now includes attributes and diagnostics.
|
||||
(Namgyu Kim via Adrien Grand)
|
||||
|
||||
* LUCENE-8548: The KoreanTokenizer no longer splits unknown words on combining diacritics and
|
||||
detects script boundaries more accurately with Character#UnicodeScript#of.
|
||||
(Christophe Bismuth, Jim Ferenczi)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-8552: FieldInfos.getMergedFieldInfos no longer does any merging if there is <= 1 segment.
|
||||
|
|
|
@ -43,11 +43,11 @@ public final class UserDictionary implements Dictionary {
|
|||
public static final short LEFT_ID = 1781;
|
||||
|
||||
// NNG right
|
||||
public static final short RIGHT_ID = 3534;
|
||||
public static final short RIGHT_ID = 3533;
|
||||
// NNG right with hangul and a coda on the last char
|
||||
public static final short RIGHT_ID_T = 3534;
|
||||
public static final short RIGHT_ID_T = 3535;
|
||||
// NNG right with hangul and no coda on the last char
|
||||
public static final short RIGHT_ID_F = 3535;
|
||||
public static final short RIGHT_ID_F = 3534;
|
||||
|
||||
// length, length... indexed by compound ID or null for simple noun
|
||||
private final int segmentations[][];
|
||||
|
|
Loading…
Reference in New Issue