mirror of https://github.com/apache/lucene.git
LUCENE-2894: add more contribution
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1074009 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d43ca1d2fd
commit
521fb53da5
|
@ -26,7 +26,18 @@ import org.apache.solr.common.SolrException.ErrorCode;
|
|||
|
||||
import java.util.Map;
|
||||
|
||||
/** Factory for {@link ShingleFilter} */
|
||||
/**
|
||||
* Factory for {@link ShingleFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_shingle" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.ShingleFilterFactory" minShingleSize="2" maxShingleSize="2"
|
||||
* outputUnigrams="true" outputUnigramsIfNoShingles="false" tokenSeparator=" "/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class ShingleFilterFactory extends BaseTokenFilterFactory {
|
||||
private int minShingleSize;
|
||||
private int maxShingleSize;
|
||||
|
|
|
@ -32,6 +32,13 @@ import org.tartarus.snowball.SnowballProgram;
|
|||
* Factory for {@link SnowballFilter}, with configurable language
|
||||
* <p>
|
||||
* Note: Use of the "Lovins" stemmer is not recommended, as it is implemented with reflection.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_snowballstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.SnowballPorterFilterFactory" protected="protectedkeyword.txt" language="English"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
|
|
|
@ -20,7 +20,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.es.SpanishLightStemFilter;
|
||||
|
||||
/** Factory for {@link SpanishLightStemFilter} */
|
||||
/**
|
||||
* Factory for {@link SpanishLightStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_eslgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.SpanishLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class SpanishLightStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new SpanishLightStemFilter(input);
|
||||
|
|
|
@ -23,6 +23,14 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
|
||||
/**
|
||||
* Factory for {@link StandardFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.StandardFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class StandardFilterFactory extends BaseTokenFilterFactory {
|
||||
|
|
|
@ -24,6 +24,13 @@ import java.io.Reader;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Factory for {@link StandardTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory" maxTokenLength="255"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
|
||||
|
|
|
@ -28,7 +28,15 @@ import org.apache.solr.common.util.StrUtils;
|
|||
import org.apache.solr.util.plugin.ResourceLoaderAware;
|
||||
|
||||
/**
|
||||
* Factory for {@link StemmerOverrideFilter}
|
||||
* Factory for {@link StemmerOverrideFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_dicstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.StemmerOverrideFilterFactory" dictionary="dictionary.txt" ignoreCase="false"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id
|
||||
*/
|
||||
public class StemmerOverrideFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
|
||||
private CharArrayMap<String> dictionary = null;
|
||||
|
|
|
@ -20,7 +20,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.sv.SwedishLightStemFilter;
|
||||
|
||||
/** Factory for {@link SwedishLightStemFilter} */
|
||||
/**
|
||||
* Factory for {@link SwedishLightStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.SwedishLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class SwedishLightStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new SwedishLightStemFilter(input);
|
||||
|
|
|
@ -35,6 +35,15 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Factory for {@link SynonymFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_synonym" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="false"
|
||||
* expand="true" tokenizerFactory="solr.WhitespaceTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class SynonymFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
|
||||
|
|
|
@ -22,7 +22,17 @@ import org.apache.lucene.analysis.th.ThaiWordFilter;
|
|||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/** Factory for {@link ThaiWordFilter} */
|
||||
/**
|
||||
* Factory for {@link ThaiWordFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_thai" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.NGramTokenizerFactory"/>
|
||||
* <filter class="solr.ThaiWordFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class ThaiWordFilterFactory extends BaseTokenFilterFactory {
|
||||
public ThaiWordFilter create(TokenStream input) {
|
||||
assureMatchVersion();
|
||||
|
|
|
@ -22,7 +22,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.payloads.TokenOffsetPayloadTokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/** Factory for {@link TokenOffsetPayloadTokenFilter} */
|
||||
/**
|
||||
* Factory for {@link TokenOffsetPayloadTokenFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_tokenoffset" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.TokenOffsetPayloadTokenFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class TokenOffsetPayloadTokenFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenOffsetPayloadTokenFilter create(TokenStream input) {
|
||||
return new TokenOffsetPayloadTokenFilter(input);
|
||||
|
|
|
@ -24,6 +24,14 @@ import org.apache.lucene.analysis.miscellaneous.TrimFilter;
|
|||
import org.apache.solr.common.SolrException;
|
||||
|
||||
/**
|
||||
* Factory for {@link TrimFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_trm" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.NGramTokenizerFactory"/>
|
||||
* <filter class="solr.TrimFilterFactory" updateOffsets="false"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
* @see TrimFilter
|
||||
*/
|
||||
|
|
|
@ -20,7 +20,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
|
||||
|
||||
/** Factory for {@link TurkishLowerCaseFilter} */
|
||||
/**
|
||||
* Factory for {@link TurkishLowerCaseFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_trlwr" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.TurkishLowerCaseFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class TurkishLowerCaseFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new TurkishLowerCaseFilter(input);
|
||||
|
|
|
@ -22,7 +22,17 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/** Factory for {@link TypeAsPayloadTokenFilter} */
|
||||
/**
|
||||
* Factory for {@link TypeAsPayloadTokenFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_typeaspayload" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.TypeAsPayloadTokenFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class TypeAsPayloadTokenFilterFactory extends BaseTokenFilterFactory {
|
||||
public TypeAsPayloadTokenFilter create(TokenStream input) {
|
||||
return new TypeAsPayloadTokenFilter(input);
|
||||
|
|
|
@ -27,6 +27,13 @@ import java.io.Reader;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Factory for {@link UAX29URLEmailTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_urlemail" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*
|
||||
*/
|
||||
|
|
|
@ -23,6 +23,13 @@ import java.io.Reader;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Factory for {@link WhitespaceTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class WhitespaceTokenizerFactory extends BaseTokenizerFactory {
|
||||
|
|
|
@ -22,7 +22,16 @@ import java.io.Reader;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
|
||||
|
||||
/** Factory for {@link WikipediaTokenizer}*/
|
||||
/**
|
||||
* Factory for {@link WikipediaTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_wiki" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WikipediaTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class WikipediaTokenizerFactory extends BaseTokenizerFactory {
|
||||
// TODO: add support for WikipediaTokenizer's advanced options.
|
||||
public Tokenizer create(Reader input) {
|
||||
|
|
|
@ -37,6 +37,17 @@ import java.io.IOException;
|
|||
|
||||
|
||||
/**
|
||||
* Factory for {@link WordDelimiterFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_wd" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.WordDelimiterFilterFactory" protected="protectedword.txt"
|
||||
* preserveOriginal="0" splitOnNumerics="1" splitOnCaseChange="1"
|
||||
* catenateWords="0" catenateNumbers="0" catenateAll="0"
|
||||
* generateWordParts="1" generateNumberParts="1" stemEnglishPossessive="1"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class WordDelimiterFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
|
||||
|
|
Loading…
Reference in New Issue