LUCENE-10416: Update Korean Dictionary to mecab-ko-dic-2.1.1-20180720 for Nori

This commit is contained in:
Tomoko Uchida 2022-02-20 21:39:03 +09:00
parent e7a29c4c4c
commit 76c9fd4e38
11 changed files with 7 additions and 4 deletions

View File

@ -202,11 +202,11 @@ Nori Korean Morphological Analyzer - Apache Lucene Integration
This software includes a binary and/or source version of data from This software includes a binary and/or source version of data from
mecab-ko-dic-2.0.3-20170922 mecab-ko-dic-2.1.1-20180720
which can be obtained from which can be obtained from
https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.0.3-20170922.tar.gz https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.1.1-20180720.tar.gz
The floating point precision conversion in NumericUtils.Float16Converter is derived from work by The floating point precision conversion in NumericUtils.Float16Converter is derived from work by
Jeroen van der Zijp, granted for use under the Apache license. Jeroen van der Zijp, granted for use under the Apache license.

View File

@ -54,7 +54,7 @@ configure(project(":lucene:analysis:nori")) {
dependsOn deleteDictionaryData dependsOn deleteDictionaryData
dependsOn sourceSets.main.runtimeClasspath dependsOn sourceSets.main.runtimeClasspath
def dictionaryName = "mecab-ko-dic-2.0.3-20170922" def dictionaryName = "mecab-ko-dic-2.1.1-20180720"
def dictionarySource = "https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/${dictionaryName}.tar.gz" def dictionarySource = "https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/${dictionaryName}.tar.gz"
def dictionaryFile = file("${buildDir}/generate/${dictionaryName}.tar.gz") def dictionaryFile = file("${buildDir}/generate/${dictionaryName}.tar.gz")
def unpackedDir = file("${buildDir}/generate/${dictionaryName}") def unpackedDir = file("${buildDir}/generate/${dictionaryName}")

View File

@ -189,6 +189,9 @@ Improvements
* LUCENE-10371: Make IndexRearranger able to arrange segment in a determined order. * LUCENE-10371: Make IndexRearranger able to arrange segment in a determined order.
(Patrick Zhai) (Patrick Zhai)
* LUCENE-10416: Update Korean Dictionary to mecab-ko-dic-2.1.1-20180720 for Nori.
(Uihyun Kim)
Optimizations Optimizations
--------------------- ---------------------

View File

@ -28,7 +28,7 @@ import java.util.List;
import org.apache.lucene.analysis.ko.dict.CharacterDefinition; import org.apache.lucene.analysis.ko.dict.CharacterDefinition;
class UnknownDictionaryBuilder { class UnknownDictionaryBuilder {
private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,1798,3559,3677,SY,*,*,*,*,*,*,*"; private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,1801,3559,3677,SY,*,*,*,*,*,*,*";
private String encoding; private String encoding;