mirror of https://github.com/apache/lucene.git
LUCENE-10416: Update Korean Dictionary to mecab-ko-dic-2.1.1-20180720 for Nori
This commit is contained in:
parent
e7a29c4c4c
commit
76c9fd4e38
|
@ -202,11 +202,11 @@ Nori Korean Morphological Analyzer - Apache Lucene Integration
|
||||||
|
|
||||||
This software includes a binary and/or source version of data from
|
This software includes a binary and/or source version of data from
|
||||||
|
|
||||||
mecab-ko-dic-2.0.3-20170922
|
mecab-ko-dic-2.1.1-20180720
|
||||||
|
|
||||||
which can be obtained from
|
which can be obtained from
|
||||||
|
|
||||||
https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.0.3-20170922.tar.gz
|
https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.1.1-20180720.tar.gz
|
||||||
|
|
||||||
The floating point precision conversion in NumericUtils.Float16Converter is derived from work by
|
The floating point precision conversion in NumericUtils.Float16Converter is derived from work by
|
||||||
Jeroen van der Zijp, granted for use under the Apache license.
|
Jeroen van der Zijp, granted for use under the Apache license.
|
||||||
|
|
|
@ -54,7 +54,7 @@ configure(project(":lucene:analysis:nori")) {
|
||||||
dependsOn deleteDictionaryData
|
dependsOn deleteDictionaryData
|
||||||
dependsOn sourceSets.main.runtimeClasspath
|
dependsOn sourceSets.main.runtimeClasspath
|
||||||
|
|
||||||
def dictionaryName = "mecab-ko-dic-2.0.3-20170922"
|
def dictionaryName = "mecab-ko-dic-2.1.1-20180720"
|
||||||
def dictionarySource = "https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/${dictionaryName}.tar.gz"
|
def dictionarySource = "https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/${dictionaryName}.tar.gz"
|
||||||
def dictionaryFile = file("${buildDir}/generate/${dictionaryName}.tar.gz")
|
def dictionaryFile = file("${buildDir}/generate/${dictionaryName}.tar.gz")
|
||||||
def unpackedDir = file("${buildDir}/generate/${dictionaryName}")
|
def unpackedDir = file("${buildDir}/generate/${dictionaryName}")
|
||||||
|
|
|
@ -189,6 +189,9 @@ Improvements
|
||||||
* LUCENE-10371: Make IndexRearranger able to arrange segment in a determined order.
|
* LUCENE-10371: Make IndexRearranger able to arrange segment in a determined order.
|
||||||
(Patrick Zhai)
|
(Patrick Zhai)
|
||||||
|
|
||||||
|
* LUCENE-10416: Update Korean Dictionary to mecab-ko-dic-2.1.1-20180720 for Nori.
|
||||||
|
(Uihyun Kim)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,7 @@ import java.util.List;
|
||||||
import org.apache.lucene.analysis.ko.dict.CharacterDefinition;
|
import org.apache.lucene.analysis.ko.dict.CharacterDefinition;
|
||||||
|
|
||||||
class UnknownDictionaryBuilder {
|
class UnknownDictionaryBuilder {
|
||||||
private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,1798,3559,3677,SY,*,*,*,*,*,*,*";
|
private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,1801,3559,3677,SY,*,*,*,*,*,*,*";
|
||||||
|
|
||||||
private String encoding;
|
private String encoding;
|
||||||
|
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue