LUCENE-2894: add more contribution

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1074009 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Koji Sekiguchi 2011-02-24 01:40:58 +00:00
parent d43ca1d2fd
commit 521fb53da5
17 changed files with 161 additions and 9 deletions

View File

@ -26,7 +26,18 @@ import org.apache.solr.common.SolrException.ErrorCode;
import java.util.Map; import java.util.Map;
/** Factory for {@link ShingleFilter} */ /**
* Factory for {@link ShingleFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_shingle" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.ShingleFilterFactory" minShingleSize="2" maxShingleSize="2"
* outputUnigrams="true" outputUnigramsIfNoShingles="false" tokenSeparator=" "/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class ShingleFilterFactory extends BaseTokenFilterFactory { public class ShingleFilterFactory extends BaseTokenFilterFactory {
private int minShingleSize; private int minShingleSize;
private int maxShingleSize; private int maxShingleSize;

View File

@ -32,6 +32,13 @@ import org.tartarus.snowball.SnowballProgram;
* Factory for {@link SnowballFilter}, with configurable language * Factory for {@link SnowballFilter}, with configurable language
* <p> * <p>
* Note: Use of the "Lovins" stemmer is not recommended, as it is implemented with reflection. * Note: Use of the "Lovins" stemmer is not recommended, as it is implemented with reflection.
* <pre class="prettyprint" >
* &lt;fieldType name="text_snowballstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.SnowballPorterFilterFactory" protected="protectedkeyword.txt" language="English"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* *
* @version $Id$ * @version $Id$
*/ */

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.es.SpanishLightStemFilter; import org.apache.lucene.analysis.es.SpanishLightStemFilter;
/** Factory for {@link SpanishLightStemFilter} */ /**
* Factory for {@link SpanishLightStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_eslgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.SpanishLightStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class SpanishLightStemFilterFactory extends BaseTokenFilterFactory { public class SpanishLightStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {
return new SpanishLightStemFilter(input); return new SpanishLightStemFilter(input);

View File

@ -23,6 +23,14 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardFilter;
/** /**
* Factory for {@link StandardFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.StandardFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$ * @version $Id$
*/ */
public class StandardFilterFactory extends BaseTokenFilterFactory { public class StandardFilterFactory extends BaseTokenFilterFactory {

View File

@ -24,6 +24,13 @@ import java.io.Reader;
import java.util.Map; import java.util.Map;
/** /**
* Factory for {@link StandardTokenizer}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory" maxTokenLength="255"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$ * @version $Id$
*/ */

View File

@ -28,7 +28,15 @@ import org.apache.solr.common.util.StrUtils;
import org.apache.solr.util.plugin.ResourceLoaderAware; import org.apache.solr.util.plugin.ResourceLoaderAware;
/** /**
* Factory for {@link StemmerOverrideFilter} * Factory for {@link StemmerOverrideFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_dicstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.StemmerOverrideFilterFactory" dictionary="dictionary.txt" ignoreCase="false"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id
*/ */
public class StemmerOverrideFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware { public class StemmerOverrideFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
private CharArrayMap<String> dictionary = null; private CharArrayMap<String> dictionary = null;

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.sv.SwedishLightStemFilter; import org.apache.lucene.analysis.sv.SwedishLightStemFilter;
/** Factory for {@link SwedishLightStemFilter} */ /**
* Factory for {@link SwedishLightStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.SwedishLightStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class SwedishLightStemFilterFactory extends BaseTokenFilterFactory { public class SwedishLightStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {
return new SwedishLightStemFilter(input); return new SwedishLightStemFilter(input);

View File

@ -35,6 +35,15 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/** /**
* Factory for {@link SynonymFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_synonym" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="false"
* expand="true" tokenizerFactory="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$ * @version $Id$
*/ */
public class SynonymFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware { public class SynonymFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {

View File

@ -22,7 +22,17 @@ import org.apache.lucene.analysis.th.ThaiWordFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
/** Factory for {@link ThaiWordFilter} */ /**
* Factory for {@link ThaiWordFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_thai" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.NGramTokenizerFactory"/&gt;
* &lt;filter class="solr.ThaiWordFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class ThaiWordFilterFactory extends BaseTokenFilterFactory { public class ThaiWordFilterFactory extends BaseTokenFilterFactory {
public ThaiWordFilter create(TokenStream input) { public ThaiWordFilter create(TokenStream input) {
assureMatchVersion(); assureMatchVersion();

View File

@ -22,7 +22,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.payloads.TokenOffsetPayloadTokenFilter; import org.apache.lucene.analysis.payloads.TokenOffsetPayloadTokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
/** Factory for {@link TokenOffsetPayloadTokenFilter} */ /**
* Factory for {@link TokenOffsetPayloadTokenFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_tokenoffset" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.TokenOffsetPayloadTokenFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class TokenOffsetPayloadTokenFilterFactory extends BaseTokenFilterFactory { public class TokenOffsetPayloadTokenFilterFactory extends BaseTokenFilterFactory {
public TokenOffsetPayloadTokenFilter create(TokenStream input) { public TokenOffsetPayloadTokenFilter create(TokenStream input) {
return new TokenOffsetPayloadTokenFilter(input); return new TokenOffsetPayloadTokenFilter(input);

View File

@ -24,6 +24,14 @@ import org.apache.lucene.analysis.miscellaneous.TrimFilter;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
/** /**
* Factory for {@link TrimFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_trm" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.NGramTokenizerFactory"/&gt;
* &lt;filter class="solr.TrimFilterFactory" updateOffsets="false"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$ * @version $Id$
* @see TrimFilter * @see TrimFilter
*/ */

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
/** Factory for {@link TurkishLowerCaseFilter} */ /**
* Factory for {@link TurkishLowerCaseFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_trlwr" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.TurkishLowerCaseFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class TurkishLowerCaseFilterFactory extends BaseTokenFilterFactory { public class TurkishLowerCaseFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {
return new TurkishLowerCaseFilter(input); return new TurkishLowerCaseFilter(input);

View File

@ -22,7 +22,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter; import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
/** Factory for {@link TypeAsPayloadTokenFilter} */ /**
* Factory for {@link TypeAsPayloadTokenFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_typeaspayload" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.TypeAsPayloadTokenFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class TypeAsPayloadTokenFilterFactory extends BaseTokenFilterFactory { public class TypeAsPayloadTokenFilterFactory extends BaseTokenFilterFactory {
public TypeAsPayloadTokenFilter create(TokenStream input) { public TypeAsPayloadTokenFilter create(TokenStream input) {
return new TypeAsPayloadTokenFilter(input); return new TypeAsPayloadTokenFilter(input);

View File

@ -27,6 +27,13 @@ import java.io.Reader;
import java.util.Map; import java.util.Map;
/** /**
* Factory for {@link UAX29URLEmailTokenizer}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_urlemail" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$ * @version $Id$
* *
*/ */

View File

@ -23,6 +23,13 @@ import java.io.Reader;
import java.util.Map; import java.util.Map;
/** /**
* Factory for {@link WhitespaceTokenizer}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$ * @version $Id$
*/ */
public class WhitespaceTokenizerFactory extends BaseTokenizerFactory { public class WhitespaceTokenizerFactory extends BaseTokenizerFactory {

View File

@ -22,7 +22,16 @@ import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer; import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
/** Factory for {@link WikipediaTokenizer}*/ /**
* Factory for {@link WikipediaTokenizer}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_wiki" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WikipediaTokenizerFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class WikipediaTokenizerFactory extends BaseTokenizerFactory { public class WikipediaTokenizerFactory extends BaseTokenizerFactory {
// TODO: add support for WikipediaTokenizer's advanced options. // TODO: add support for WikipediaTokenizer's advanced options.
public Tokenizer create(Reader input) { public Tokenizer create(Reader input) {

View File

@ -37,6 +37,17 @@ import java.io.IOException;
/** /**
* Factory for {@link WordDelimiterFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_wd" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.WordDelimiterFilterFactory" protected="protectedword.txt"
* preserveOriginal="0" splitOnNumerics="1" splitOnCaseChange="1"
* catenateWords="0" catenateNumbers="0" catenateAll="0"
* generateWordParts="1" generateNumberParts="1" stemEnglishPossessive="1"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$ * @version $Id$
*/ */
public class WordDelimiterFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware { public class WordDelimiterFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {