diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index f1e6ebba8d9..1b0926043ca 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -254,6 +254,9 @@ Other: * LUCENE-8420: Upgrade OpenNLP to 1.9.0 so OpenNLP tool can read the new model format which 1.8.x cannot read. 1.9.0 can read the old format. (Koji Sekiguchi) +* LUCENE-8453: Add documentation to analysis factories of Korean (Nori) analyzer + module. (Tomoko Uchida via Uwe Schindler) + ======================= Lucene 7.4.1 ======================= Bug Fixes: diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java index 010abc8d2f0..70bcef6cc41 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java @@ -25,12 +25,27 @@ import org.apache.lucene.analysis.util.TokenFilterFactory; /** * Factory for {@link KoreanPartOfSpeechStopFilter}. + *
+ * <fieldType name="text_ko" class="solr.TextField"> + * <analyzer> + * <tokenizer class="solr.KoreanTokenizerFactory"/> + * <filter class="solr.KoreanPartOfSpeechStopFilterFactory" + * tags="E,J"/> + * </analyzer> + * </fieldType> + *+ * + *
+ * Supports the following attributes: + *
+ * <fieldType name="text_ko" class="solr.TextField"> + * <analyzer> + * <tokenizer class="solr.KoreanTokenizerFactory"/> + * <filter class="solr.KoreanReadingFormFilterFactory"/> + * </analyzer> + * </fieldType> + ** @lucene.experimental */ public class KoreanReadingFormFilterFactory extends TokenFilterFactory { diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java index 43a02d3a0d6..f2fed275e10 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java @@ -32,9 +32,31 @@ import org.apache.lucene.util.AttributeFactory; import org.apache.lucene.util.IOUtils; import org.apache.lucene.analysis.util.ResourceLoader; import org.apache.lucene.analysis.util.ResourceLoaderAware; +import org.apache.lucene.analysis.ko.KoreanTokenizer.DecompoundMode; /** * Factory for {@link KoreanTokenizer}. + *
+ * <fieldType name="text_ko" class="solr.TextField"> + * <analyzer> + * <tokenizer class="solr.KoreanTokenizerFactory" + * decompoundMode="discard" + * userDictionary="user.txt" + * userDictionaryEncoding="UTF-8" + * outputUnknownUnigrams="false" + * /> + * </analyzer> + * </fieldType> + *+ * + *
+ * Supports the following attributes: + *