mirror of https://github.com/apache/lucene.git
LUCENE-10416: Update Korean Dictionary to mecab-ko-dic-2.1.1-20180720 for Nori
This commit is contained in:
parent
e3a7d279b0
commit
b2b3596466
|
@ -202,11 +202,11 @@ Nori Korean Morphological Analyzer - Apache Lucene Integration
|
|||
|
||||
This software includes a binary and/or source version of data from
|
||||
|
||||
mecab-ko-dic-2.0.3-20170922
|
||||
mecab-ko-dic-2.1.1-20180720
|
||||
|
||||
which can be obtained from
|
||||
|
||||
https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.0.3-20170922.tar.gz
|
||||
https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.1.1-20180720.tar.gz
|
||||
|
||||
The floating point precision conversion in NumericUtils.Float16Converter is derived from work by
|
||||
Jeroen van der Zijp, granted for use under the Apache license.
|
||||
|
|
|
@ -54,7 +54,7 @@ configure(project(":lucene:analysis:nori")) {
|
|||
dependsOn deleteDictionaryData
|
||||
dependsOn sourceSets.main.runtimeClasspath
|
||||
|
||||
def dictionaryName = "mecab-ko-dic-2.0.3-20170922"
|
||||
def dictionaryName = "mecab-ko-dic-2.1.1-20180720"
|
||||
def dictionarySource = "https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/${dictionaryName}.tar.gz"
|
||||
def dictionaryFile = file("${buildDir}/generate/${dictionaryName}.tar.gz")
|
||||
def unpackedDir = file("${buildDir}/generate/${dictionaryName}")
|
||||
|
|
|
@ -148,6 +148,9 @@ Improvements
|
|||
|
||||
* LUCENE-10371: Make IndexRearranger able to arrange segment in a determined order.
|
||||
(Patrick Zhai)
|
||||
|
||||
* LUCENE-10416: Update Korean Dictionary to mecab-ko-dic-2.1.1-20180720 for Nori.
|
||||
(Uihyun Kim)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
|
|
|
@ -28,7 +28,7 @@ import java.util.List;
|
|||
import org.apache.lucene.analysis.ko.dict.CharacterDefinition;
|
||||
|
||||
class UnknownDictionaryBuilder {
|
||||
private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,1798,3559,3677,SY,*,*,*,*,*,*,*";
|
||||
private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,1801,3559,3677,SY,*,*,*,*,*,*,*";
|
||||
|
||||
private String encoding;
|
||||
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue