mirror of https://github.com/apache/lucene.git
Add example schema settings for Korean analyzer.
This commit is contained in:
parent
93926e9c83
commit
82cf667b25
|
@ -25,12 +25,21 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link KoreanPartOfSpeechStopFilter}.
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_ko" class="solr.TextField">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.KoreanTokenizerFactory"/>
|
||||
* <filter class="solr.KoreanPartOfSpeechStopFilterFactory"
|
||||
* tags="E,J"/>
|
||||
* </analyzer>
|
||||
* </fieldType>
|
||||
* </pre>
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class KoreanPartOfSpeechStopFilterFactory extends TokenFilterFactory {
|
||||
private Set<POS.Tag> stopTags;
|
||||
|
||||
/** Creates a new JapanesePartOfSpeechStopFilterFactory */
|
||||
/** Creates a new KoreanPartOfSpeechStopFilterFactory */
|
||||
public KoreanPartOfSpeechStopFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
Set<String> stopTagStr = getSet(args, "tags");
|
||||
|
|
|
@ -23,6 +23,14 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Factory for {@link KoreanReadingFormFilter}.
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_ko" class="solr.TextField">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.KoreanTokenizerFactory"/>
|
||||
* <filter class="solr.KoreanReadingFormFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType>
|
||||
* </pre>
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class KoreanReadingFormFilterFactory extends TokenFilterFactory {
|
||||
|
|
|
@ -35,6 +35,18 @@ import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
|||
|
||||
/**
|
||||
* Factory for {@link KoreanTokenizer}.
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_ko" class="solr.TextField">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.KoreanTokenizerFactory"
|
||||
* decompoundMode="DISCARD"
|
||||
* userDictionary="user.txt"
|
||||
* userDictionaryEncoding="UTF-8"
|
||||
* outputUnknownUnigrams="true"
|
||||
* />
|
||||
* </analyzer>
|
||||
* </fieldType>
|
||||
</pre>
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class KoreanTokenizerFactory extends TokenizerFactory implements ResourceLoaderAware {
|
||||
|
|
Loading…
Reference in New Issue