LUCENE-8453: Add documentation to analysis factories of Korean (Nori) analyzer module
This closes #434
This commit is contained in:
Uwe Schindler 2018-08-11 12:50:19 +02:00
commit e9addea087
4 changed files with 49 additions and 1 deletions

View File

@ -254,6 +254,9 @@ Other:
* LUCENE-8420: Upgrade OpenNLP to 1.9.0 so OpenNLP tool can read the new model format which 1.8.x * LUCENE-8420: Upgrade OpenNLP to 1.9.0 so OpenNLP tool can read the new model format which 1.8.x
cannot read. 1.9.0 can read the old format. (Koji Sekiguchi) cannot read. 1.9.0 can read the old format. (Koji Sekiguchi)
* LUCENE-8453: Add documentation to analysis factories of Korean (Nori) analyzer
module. (Tomoko Uchida via Uwe Schindler)
======================= Lucene 7.4.1 ======================= ======================= Lucene 7.4.1 =======================
Bug Fixes: Bug Fixes:

View File

@ -25,12 +25,27 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/** /**
* Factory for {@link KoreanPartOfSpeechStopFilter}. * Factory for {@link KoreanPartOfSpeechStopFilter}.
* <pre class="prettyprint">
* &lt;fieldType name="text_ko" class="solr.TextField"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.KoreanTokenizerFactory"/&gt;
* &lt;filter class="solr.KoreanPartOfSpeechStopFilterFactory"
* tags="E,J"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;
* </pre>
*
* <p>
* Supports the following attributes:
* <ul>
* <li>tags: List of stop tags. if not specified, {@link KoreanPartOfSpeechStopFilter#DEFAULT_STOP_TAGS} is used.</li>
* </ul>
* @lucene.experimental * @lucene.experimental
*/ */
public class KoreanPartOfSpeechStopFilterFactory extends TokenFilterFactory { public class KoreanPartOfSpeechStopFilterFactory extends TokenFilterFactory {
private Set<POS.Tag> stopTags; private Set<POS.Tag> stopTags;
/** Creates a new JapanesePartOfSpeechStopFilterFactory */ /** Creates a new KoreanPartOfSpeechStopFilterFactory */
public KoreanPartOfSpeechStopFilterFactory(Map<String,String> args) { public KoreanPartOfSpeechStopFilterFactory(Map<String,String> args) {
super(args); super(args);
Set<String> stopTagStr = getSet(args, "tags"); Set<String> stopTagStr = getSet(args, "tags");

View File

@ -23,6 +23,14 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/** /**
* Factory for {@link KoreanReadingFormFilter}. * Factory for {@link KoreanReadingFormFilter}.
* <pre class="prettyprint">
* &lt;fieldType name="text_ko" class="solr.TextField"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.KoreanTokenizerFactory"/&gt;
* &lt;filter class="solr.KoreanReadingFormFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;
* </pre>
* @lucene.experimental * @lucene.experimental
*/ */
public class KoreanReadingFormFilterFactory extends TokenFilterFactory { public class KoreanReadingFormFilterFactory extends TokenFilterFactory {

View File

@ -32,9 +32,31 @@ import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.analysis.util.ResourceLoader; import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware; import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.ko.KoreanTokenizer.DecompoundMode;
/** /**
* Factory for {@link KoreanTokenizer}. * Factory for {@link KoreanTokenizer}.
* <pre class="prettyprint">
* &lt;fieldType name="text_ko" class="solr.TextField"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.KoreanTokenizerFactory"
* decompoundMode="discard"
* userDictionary="user.txt"
* userDictionaryEncoding="UTF-8"
* outputUnknownUnigrams="false"
* /&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;
* </pre>
*
* <p>
* Supports the following attributes:
* <ul>
* <li>userDictionary: User dictionary path.</li>
* <li>userDictionaryEncoding: User dictionary encoding.</li>
* <li>decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is discard. See {@link DecompoundMode}</li>
* <li>outputUnknownUnigrams: If true outputs unigrams for unknown words.</li>
* </ul>
* @lucene.experimental * @lucene.experimental
*/ */
public class KoreanTokenizerFactory extends TokenizerFactory implements ResourceLoaderAware { public class KoreanTokenizerFactory extends TokenizerFactory implements ResourceLoaderAware {