LUCENE-8784: Restore the Korean's part of speech tag for NGRAM.

The part of speech tag for unigram has been changed inadvertenly in a previous commit (not released). This change restores the original value that is also set on the serialized unkwnown dictionary.
2019-05-28 12:00:27 +02:00 · 2019-05-28 12:00:27 +02:00 · bf0d6fad42
parent 04d781db8b
commit bf0d6fad42
1 changed files with 1 additions and 1 deletions
--- a/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/UnknownDictionaryBuilder.java
+++ b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/UnknownDictionaryBuilder.java
@ -32,7 +32,7 @@ import java.util.List;
 import org.apache.lucene.analysis.ko.dict.CharacterDefinition;

 public class UnknownDictionaryBuilder {
-  private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,1798,3559,3677,UNKNOWN,*,*,*,*,*,*,*";
+  private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,1798,3559,3677,SY,*,*,*,*,*,*,*";
  
  private String encoding = "utf-8";