mirror of https://github.com/apache/lucene.git
Merge branch 'jira/lucene-8453' of https://github.com/mocobeta/lucene-solr-mirror
LUCENE-8453: Add documentation to analysis factories of Korean (Nori) analyzer module This closes #434
This commit is contained in:
commit
e9addea087
|
@ -254,6 +254,9 @@ Other:
|
||||||
* LUCENE-8420: Upgrade OpenNLP to 1.9.0 so OpenNLP tool can read the new model format which 1.8.x
|
* LUCENE-8420: Upgrade OpenNLP to 1.9.0 so OpenNLP tool can read the new model format which 1.8.x
|
||||||
cannot read. 1.9.0 can read the old format. (Koji Sekiguchi)
|
cannot read. 1.9.0 can read the old format. (Koji Sekiguchi)
|
||||||
|
|
||||||
|
* LUCENE-8453: Add documentation to analysis factories of Korean (Nori) analyzer
|
||||||
|
module. (Tomoko Uchida via Uwe Schindler)
|
||||||
|
|
||||||
======================= Lucene 7.4.1 =======================
|
======================= Lucene 7.4.1 =======================
|
||||||
|
|
||||||
Bug Fixes:
|
Bug Fixes:
|
||||||
|
|
|
@ -25,12 +25,27 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory for {@link KoreanPartOfSpeechStopFilter}.
|
* Factory for {@link KoreanPartOfSpeechStopFilter}.
|
||||||
|
* <pre class="prettyprint">
|
||||||
|
* <fieldType name="text_ko" class="solr.TextField">
|
||||||
|
* <analyzer>
|
||||||
|
* <tokenizer class="solr.KoreanTokenizerFactory"/>
|
||||||
|
* <filter class="solr.KoreanPartOfSpeechStopFilterFactory"
|
||||||
|
* tags="E,J"/>
|
||||||
|
* </analyzer>
|
||||||
|
* </fieldType>
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* Supports the following attributes:
|
||||||
|
* <ul>
|
||||||
|
* <li>tags: List of stop tags. if not specified, {@link KoreanPartOfSpeechStopFilter#DEFAULT_STOP_TAGS} is used.</li>
|
||||||
|
* </ul>
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public class KoreanPartOfSpeechStopFilterFactory extends TokenFilterFactory {
|
public class KoreanPartOfSpeechStopFilterFactory extends TokenFilterFactory {
|
||||||
private Set<POS.Tag> stopTags;
|
private Set<POS.Tag> stopTags;
|
||||||
|
|
||||||
/** Creates a new JapanesePartOfSpeechStopFilterFactory */
|
/** Creates a new KoreanPartOfSpeechStopFilterFactory */
|
||||||
public KoreanPartOfSpeechStopFilterFactory(Map<String,String> args) {
|
public KoreanPartOfSpeechStopFilterFactory(Map<String,String> args) {
|
||||||
super(args);
|
super(args);
|
||||||
Set<String> stopTagStr = getSet(args, "tags");
|
Set<String> stopTagStr = getSet(args, "tags");
|
||||||
|
|
|
@ -23,6 +23,14 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory for {@link KoreanReadingFormFilter}.
|
* Factory for {@link KoreanReadingFormFilter}.
|
||||||
|
* <pre class="prettyprint">
|
||||||
|
* <fieldType name="text_ko" class="solr.TextField">
|
||||||
|
* <analyzer>
|
||||||
|
* <tokenizer class="solr.KoreanTokenizerFactory"/>
|
||||||
|
* <filter class="solr.KoreanReadingFormFilterFactory"/>
|
||||||
|
* </analyzer>
|
||||||
|
* </fieldType>
|
||||||
|
* </pre>
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public class KoreanReadingFormFilterFactory extends TokenFilterFactory {
|
public class KoreanReadingFormFilterFactory extends TokenFilterFactory {
|
||||||
|
|
|
@ -32,9 +32,31 @@ import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||||
|
import org.apache.lucene.analysis.ko.KoreanTokenizer.DecompoundMode;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory for {@link KoreanTokenizer}.
|
* Factory for {@link KoreanTokenizer}.
|
||||||
|
* <pre class="prettyprint">
|
||||||
|
* <fieldType name="text_ko" class="solr.TextField">
|
||||||
|
* <analyzer>
|
||||||
|
* <tokenizer class="solr.KoreanTokenizerFactory"
|
||||||
|
* decompoundMode="discard"
|
||||||
|
* userDictionary="user.txt"
|
||||||
|
* userDictionaryEncoding="UTF-8"
|
||||||
|
* outputUnknownUnigrams="false"
|
||||||
|
* />
|
||||||
|
* </analyzer>
|
||||||
|
* </fieldType>
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* Supports the following attributes:
|
||||||
|
* <ul>
|
||||||
|
* <li>userDictionary: User dictionary path.</li>
|
||||||
|
* <li>userDictionaryEncoding: User dictionary encoding.</li>
|
||||||
|
* <li>decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is discard. See {@link DecompoundMode}</li>
|
||||||
|
* <li>outputUnknownUnigrams: If true outputs unigrams for unknown words.</li>
|
||||||
|
* </ul>
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public class KoreanTokenizerFactory extends TokenizerFactory implements ResourceLoaderAware {
|
public class KoreanTokenizerFactory extends TokenizerFactory implements ResourceLoaderAware {
|
||||||
|
|
Loading…
Reference in New Issue