diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index f1e6ebba8d9..1b0926043ca 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -254,6 +254,9 @@ Other: * LUCENE-8420: Upgrade OpenNLP to 1.9.0 so OpenNLP tool can read the new model format which 1.8.x cannot read. 1.9.0 can read the old format. (Koji Sekiguchi) +* LUCENE-8453: Add documentation to analysis factories of Korean (Nori) analyzer + module. (Tomoko Uchida via Uwe Schindler) + ======================= Lucene 7.4.1 ======================= Bug Fixes: diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java index 010abc8d2f0..70bcef6cc41 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java @@ -25,12 +25,27 @@ import org.apache.lucene.analysis.util.TokenFilterFactory; /** * Factory for {@link KoreanPartOfSpeechStopFilter}. + *
+ * <fieldType name="text_ko" class="solr.TextField">
+ *    <analyzer>
+ *      <tokenizer class="solr.KoreanTokenizerFactory"/>
+ *      <filter class="solr.KoreanPartOfSpeechStopFilterFactory"
+ *              tags="E,J"/>
+ *    </analyzer>
+ * </fieldType>
+ * 
+ * + *

+ * Supports the following attributes: + *

* @lucene.experimental */ public class KoreanPartOfSpeechStopFilterFactory extends TokenFilterFactory { private Set stopTags; - /** Creates a new JapanesePartOfSpeechStopFilterFactory */ + /** Creates a new KoreanPartOfSpeechStopFilterFactory */ public KoreanPartOfSpeechStopFilterFactory(Map args) { super(args); Set stopTagStr = getSet(args, "tags"); diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilterFactory.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilterFactory.java index 860a1393396..ce2779b7bbe 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilterFactory.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilterFactory.java @@ -23,6 +23,14 @@ import org.apache.lucene.analysis.util.TokenFilterFactory; /** * Factory for {@link KoreanReadingFormFilter}. + *
+ * <fieldType name="text_ko" class="solr.TextField">
+ *   <analyzer>
+ *     <tokenizer class="solr.KoreanTokenizerFactory"/>
+ *     <filter class="solr.KoreanReadingFormFilterFactory"/>
+ *   </analyzer>
+ * </fieldType>
+ * 
* @lucene.experimental */ public class KoreanReadingFormFilterFactory extends TokenFilterFactory { diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java index 43a02d3a0d6..f2fed275e10 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java @@ -32,9 +32,31 @@ import org.apache.lucene.util.AttributeFactory; import org.apache.lucene.util.IOUtils; import org.apache.lucene.analysis.util.ResourceLoader; import org.apache.lucene.analysis.util.ResourceLoaderAware; +import org.apache.lucene.analysis.ko.KoreanTokenizer.DecompoundMode; /** * Factory for {@link KoreanTokenizer}. + *
+ * <fieldType name="text_ko" class="solr.TextField">
+ *   <analyzer>
+ *     <tokenizer class="solr.KoreanTokenizerFactory"
+ *                decompoundMode="discard"
+ *                userDictionary="user.txt"
+ *                userDictionaryEncoding="UTF-8"
+ *                outputUnknownUnigrams="false"
+ *     />
+ *  </analyzer>
+ * </fieldType>
+ * 
+ * + *

+ * Supports the following attributes: + *

    + *
  • userDictionary: User dictionary path.
  • + *
  • userDictionaryEncoding: User dictionary encoding.
  • + *
  • decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is discard. See {@link DecompoundMode}
  • + *
  • outputUnknownUnigrams: If true outputs unigrams for unknown words.
  • + *
* @lucene.experimental */ public class KoreanTokenizerFactory extends TokenizerFactory implements ResourceLoaderAware {