Merge branch 'jira/lucene-8453' of https://github.com/mocobeta/lucene-solr-mirror

LUCENE-8453: Add documentation to analysis factories of Korean (Nori) analyzer module This closes #434
2018-08-11 12:50:19 +02:00 · 2018-08-11 12:50:19 +02:00 · e9addea087
parent cdc0959afc f64f243ef0
commit e9addea087
4 changed files with 49 additions and 1 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -254,6 +254,9 @@ Other:
 * LUCENE-8420: Upgrade OpenNLP to 1.9.0 so OpenNLP tool can read the new model format which 1.8.x
  cannot read. 1.9.0 can read the old format. (Koji Sekiguchi)
 * LUCENE-8453: Add documentation to analysis factories of Korean (Nori) analyzer
  module.  (Tomoko Uchida via Uwe Schindler)
 ======================= Lucene 7.4.1 =======================
 Bug Fixes:
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java
@ -25,12 +25,27 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
 /**
 * Factory for {@link KoreanPartOfSpeechStopFilter}.
 * <pre class="prettyprint">
 * &lt;fieldType name="text_ko" class="solr.TextField"&gt;
 *    &lt;analyzer&gt;
 *      &lt;tokenizer class="solr.KoreanTokenizerFactory"/&gt;
 *      &lt;filter class="solr.KoreanPartOfSpeechStopFilterFactory"
 *              tags="E,J"/&gt;
 *    &lt;/analyzer&gt;
 * &lt;/fieldType&gt;
 * </pre>
 *
 * <p>
 * Supports the following attributes:
 * <ul>
 *   <li>tags: List of stop tags. if not specified, {@link KoreanPartOfSpeechStopFilter#DEFAULT_STOP_TAGS} is used.</li>
 * </ul>
 * @lucene.experimental
 */
 public class KoreanPartOfSpeechStopFilterFactory extends TokenFilterFactory {
  private Set<POS.Tag> stopTags;
-  /** Creates a new JapanesePartOfSpeechStopFilterFactory */
+  /** Creates a new KoreanPartOfSpeechStopFilterFactory */
  public KoreanPartOfSpeechStopFilterFactory(Map<String,String> args) {
    super(args);
    Set<String> stopTagStr = getSet(args, "tags");
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilterFactory.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilterFactory.java
@ -23,6 +23,14 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
 /**
 * Factory for {@link KoreanReadingFormFilter}.
 * <pre class="prettyprint">
 * &lt;fieldType name="text_ko" class="solr.TextField"&gt;
 *   &lt;analyzer&gt;
 *     &lt;tokenizer class="solr.KoreanTokenizerFactory"/&gt;
 *     &lt;filter class="solr.KoreanReadingFormFilterFactory"/&gt;
 *   &lt;/analyzer&gt;
 * &lt;/fieldType&gt;
 * </pre>
 * @lucene.experimental
 */
 public class KoreanReadingFormFilterFactory extends TokenFilterFactory {
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java
@ -32,9 +32,31 @@ import org.apache.lucene.util.AttributeFactory;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.ko.KoreanTokenizer.DecompoundMode;
 /**
 * Factory for {@link KoreanTokenizer}.
 * <pre class="prettyprint">
 * &lt;fieldType name="text_ko" class="solr.TextField"&gt;
 *   &lt;analyzer&gt;
 *     &lt;tokenizer class="solr.KoreanTokenizerFactory"
 *                decompoundMode="discard"
 *                userDictionary="user.txt"
 *                userDictionaryEncoding="UTF-8"
 *                outputUnknownUnigrams="false"
 *     /&gt;
 *  &lt;/analyzer&gt;
 * &lt;/fieldType&gt;
 * </pre>
 *
 * <p>
 * Supports the following attributes:
 * <ul>
 *   <li>userDictionary: User dictionary path.</li>
 *   <li>userDictionaryEncoding: User dictionary encoding.</li>
 *   <li>decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is discard. See {@link DecompoundMode}</li>
 *   <li>outputUnknownUnigrams: If true outputs unigrams for unknown words.</li>
 * </ul>
 * @lucene.experimental
 */
 public class KoreanTokenizerFactory extends TokenizerFactory implements ResourceLoaderAware {