LUCENE-2894: add more contribution

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1074009 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Koji Sekiguchi 2011-02-24 01:40:58 +00:00
parent d43ca1d2fd
commit 521fb53da5
17 changed files with 161 additions and 9 deletions

View File

@ -26,7 +26,18 @@ import org.apache.solr.common.SolrException.ErrorCode;
import java.util.Map;
/** Factory for {@link ShingleFilter} */
/**
* Factory for {@link ShingleFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_shingle" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.ShingleFilterFactory" minShingleSize="2" maxShingleSize="2"
* outputUnigrams="true" outputUnigramsIfNoShingles="false" tokenSeparator=" "/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class ShingleFilterFactory extends BaseTokenFilterFactory {
private int minShingleSize;
private int maxShingleSize;

View File

@ -32,6 +32,13 @@ import org.tartarus.snowball.SnowballProgram;
* Factory for {@link SnowballFilter}, with configurable language
* <p>
* Note: Use of the "Lovins" stemmer is not recommended, as it is implemented with reflection.
* <pre class="prettyprint" >
* &lt;fieldType name="text_snowballstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.SnowballPorterFilterFactory" protected="protectedkeyword.txt" language="English"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @version $Id$
*/

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.es.SpanishLightStemFilter;
/** Factory for {@link SpanishLightStemFilter} */
/**
* Factory for {@link SpanishLightStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_eslgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.SpanishLightStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class SpanishLightStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new SpanishLightStemFilter(input);

View File

@ -23,6 +23,14 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardFilter;
/**
* Factory for {@link StandardFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.StandardFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class StandardFilterFactory extends BaseTokenFilterFactory {

View File

@ -24,6 +24,13 @@ import java.io.Reader;
import java.util.Map;
/**
* Factory for {@link StandardTokenizer}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory" maxTokenLength="255"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/

View File

@ -28,7 +28,15 @@ import org.apache.solr.common.util.StrUtils;
import org.apache.solr.util.plugin.ResourceLoaderAware;
/**
* Factory for {@link StemmerOverrideFilter}
* Factory for {@link StemmerOverrideFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_dicstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.StemmerOverrideFilterFactory" dictionary="dictionary.txt" ignoreCase="false"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id
*/
public class StemmerOverrideFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
private CharArrayMap<String> dictionary = null;

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.sv.SwedishLightStemFilter;
/** Factory for {@link SwedishLightStemFilter} */
/**
* Factory for {@link SwedishLightStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.SwedishLightStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class SwedishLightStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new SwedishLightStemFilter(input);

View File

@ -35,6 +35,15 @@ import java.util.List;
import java.util.Map;
/**
* Factory for {@link SynonymFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_synonym" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="false"
* expand="true" tokenizerFactory="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class SynonymFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {

View File

@ -22,7 +22,17 @@ import org.apache.lucene.analysis.th.ThaiWordFilter;
import org.apache.lucene.analysis.TokenStream;
/** Factory for {@link ThaiWordFilter} */
/**
* Factory for {@link ThaiWordFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_thai" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.NGramTokenizerFactory"/&gt;
* &lt;filter class="solr.ThaiWordFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class ThaiWordFilterFactory extends BaseTokenFilterFactory {
public ThaiWordFilter create(TokenStream input) {
assureMatchVersion();

View File

@ -22,7 +22,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.payloads.TokenOffsetPayloadTokenFilter;
import org.apache.lucene.analysis.TokenStream;
/** Factory for {@link TokenOffsetPayloadTokenFilter} */
/**
* Factory for {@link TokenOffsetPayloadTokenFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_tokenoffset" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.TokenOffsetPayloadTokenFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class TokenOffsetPayloadTokenFilterFactory extends BaseTokenFilterFactory {
public TokenOffsetPayloadTokenFilter create(TokenStream input) {
return new TokenOffsetPayloadTokenFilter(input);

View File

@ -24,6 +24,14 @@ import org.apache.lucene.analysis.miscellaneous.TrimFilter;
import org.apache.solr.common.SolrException;
/**
* Factory for {@link TrimFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_trm" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.NGramTokenizerFactory"/&gt;
* &lt;filter class="solr.TrimFilterFactory" updateOffsets="false"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
* @see TrimFilter
*/

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
/** Factory for {@link TurkishLowerCaseFilter} */
/**
* Factory for {@link TurkishLowerCaseFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_trlwr" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.TurkishLowerCaseFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class TurkishLowerCaseFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new TurkishLowerCaseFilter(input);

View File

@ -22,7 +22,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter;
import org.apache.lucene.analysis.TokenStream;
/** Factory for {@link TypeAsPayloadTokenFilter} */
/**
* Factory for {@link TypeAsPayloadTokenFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_typeaspayload" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.TypeAsPayloadTokenFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class TypeAsPayloadTokenFilterFactory extends BaseTokenFilterFactory {
public TypeAsPayloadTokenFilter create(TokenStream input) {
return new TypeAsPayloadTokenFilter(input);

View File

@ -27,6 +27,13 @@ import java.io.Reader;
import java.util.Map;
/**
* Factory for {@link UAX29URLEmailTokenizer}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_urlemail" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*
*/

View File

@ -23,6 +23,13 @@ import java.io.Reader;
import java.util.Map;
/**
* Factory for {@link WhitespaceTokenizer}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class WhitespaceTokenizerFactory extends BaseTokenizerFactory {

View File

@ -22,7 +22,16 @@ import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
/** Factory for {@link WikipediaTokenizer}*/
/**
* Factory for {@link WikipediaTokenizer}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_wiki" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WikipediaTokenizerFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class WikipediaTokenizerFactory extends BaseTokenizerFactory {
// TODO: add support for WikipediaTokenizer's advanced options.
public Tokenizer create(Reader input) {

View File

@ -37,6 +37,17 @@ import java.io.IOException;
/**
* Factory for {@link WordDelimiterFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_wd" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.WordDelimiterFilterFactory" protected="protectedword.txt"
* preserveOriginal="0" splitOnNumerics="1" splitOnCaseChange="1"
* catenateWords="0" catenateNumbers="0" catenateAll="0"
* generateWordParts="1" generateNumberParts="1" stemEnglishPossessive="1"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class WordDelimiterFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {