mirror of https://github.com/apache/lucene.git
LUCENE-10416: Update Korean Dictionary to mecab-ko-dic-2.1.1-20180720 for Nori
This commit is contained in:
parent
e7a29c4c4c
commit
76c9fd4e38
|
@ -202,11 +202,11 @@ Nori Korean Morphological Analyzer - Apache Lucene Integration
|
|||
|
||||
This software includes a binary and/or source version of data from
|
||||
|
||||
mecab-ko-dic-2.0.3-20170922
|
||||
mecab-ko-dic-2.1.1-20180720
|
||||
|
||||
which can be obtained from
|
||||
|
||||
https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.0.3-20170922.tar.gz
|
||||
https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.1.1-20180720.tar.gz
|
||||
|
||||
The floating point precision conversion in NumericUtils.Float16Converter is derived from work by
|
||||
Jeroen van der Zijp, granted for use under the Apache license.
|
||||
|
|
|
@ -54,7 +54,7 @@ configure(project(":lucene:analysis:nori")) {
|
|||
dependsOn deleteDictionaryData
|
||||
dependsOn sourceSets.main.runtimeClasspath
|
||||
|
||||
def dictionaryName = "mecab-ko-dic-2.0.3-20170922"
|
||||
def dictionaryName = "mecab-ko-dic-2.1.1-20180720"
|
||||
def dictionarySource = "https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/${dictionaryName}.tar.gz"
|
||||
def dictionaryFile = file("${buildDir}/generate/${dictionaryName}.tar.gz")
|
||||
def unpackedDir = file("${buildDir}/generate/${dictionaryName}")
|
||||
|
|
|
@ -189,6 +189,9 @@ Improvements
|
|||
* LUCENE-10371: Make IndexRearranger able to arrange segment in a determined order.
|
||||
(Patrick Zhai)
|
||||
|
||||
* LUCENE-10416: Update Korean Dictionary to mecab-ko-dic-2.1.1-20180720 for Nori.
|
||||
(Uihyun Kim)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ import java.util.List;
|
|||
import org.apache.lucene.analysis.ko.dict.CharacterDefinition;
|
||||
|
||||
class UnknownDictionaryBuilder {
|
||||
private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,1798,3559,3677,SY,*,*,*,*,*,*,*";
|
||||
private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,1801,3559,3677,SY,*,*,*,*,*,*,*";
|
||||
|
||||
private String encoding;
|
||||
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue