Add example schema settings for Korean analyzer.

This commit is contained in:
Tomoko Uchida 2018-08-10 21:52:57 +09:00
parent 93926e9c83
commit 82cf667b25
3 changed files with 30 additions and 1 deletions

View File

@ -25,12 +25,21 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link KoreanPartOfSpeechStopFilter}.
* <pre class="prettyprint">
* &lt;fieldType name="text_ko" class="solr.TextField"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.KoreanTokenizerFactory"/&gt;
* &lt;filter class="solr.KoreanPartOfSpeechStopFilterFactory"
* tags="E,J"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;
* </pre>
* @lucene.experimental
*/
public class KoreanPartOfSpeechStopFilterFactory extends TokenFilterFactory {
private Set<POS.Tag> stopTags;
/** Creates a new JapanesePartOfSpeechStopFilterFactory */
/** Creates a new KoreanPartOfSpeechStopFilterFactory */
public KoreanPartOfSpeechStopFilterFactory(Map<String,String> args) {
super(args);
Set<String> stopTagStr = getSet(args, "tags");

View File

@ -23,6 +23,14 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link KoreanReadingFormFilter}.
* <pre class="prettyprint">
* &lt;fieldType name="text_ko" class="solr.TextField"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.KoreanTokenizerFactory"/&gt;
* &lt;filter class="solr.KoreanReadingFormFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;
* </pre>
* @lucene.experimental
*/
public class KoreanReadingFormFilterFactory extends TokenFilterFactory {

View File

@ -35,6 +35,18 @@ import org.apache.lucene.analysis.util.ResourceLoaderAware;
/**
* Factory for {@link KoreanTokenizer}.
* <pre class="prettyprint">
* &lt;fieldType name="text_ko" class="solr.TextField"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.KoreanTokenizerFactory"
* decompoundMode="DISCARD"
* userDictionary="user.txt"
* userDictionaryEncoding="UTF-8"
* outputUnknownUnigrams="true"
* /&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;
</pre>
* @lucene.experimental
*/
public class KoreanTokenizerFactory extends TokenizerFactory implements ResourceLoaderAware {