LUCENE-2894: add schema.xml samples for solr analysis factories (A to H)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1073336 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Koji Sekiguchi 2011-02-22 14:17:10 +00:00
parent 43113fe217
commit e5309e652b
36 changed files with 375 additions and 36 deletions

View File

@ -21,7 +21,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
/** Factory for {@link ASCIIFoldingFilter} */ /**
* Factory for {@link ASCIIFoldingFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_ascii" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.ASCIIFoldingFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class ASCIIFoldingFilterFactory extends BaseTokenFilterFactory { public class ASCIIFoldingFilterFactory extends BaseTokenFilterFactory {
public ASCIIFoldingFilter create(TokenStream input) { public ASCIIFoldingFilter create(TokenStream input) {
return new ASCIIFoldingFilter(input); return new ASCIIFoldingFilter(input);

View File

@ -21,8 +21,16 @@ import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
/** /**
* Factory for {@link ArabicNormalizationFilter} * Factory for {@link ArabicNormalizationFilter}.
**/ * <pre class="prettyprint" >
* &lt;fieldType name="text_arnormal" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.ArabicNormalizationFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class ArabicNormalizationFilterFactory extends BaseTokenFilterFactory{ public class ArabicNormalizationFilterFactory extends BaseTokenFilterFactory{
public ArabicNormalizationFilter create(TokenStream input) { public ArabicNormalizationFilter create(TokenStream input) {

View File

@ -21,8 +21,16 @@ import org.apache.lucene.analysis.ar.ArabicStemFilter;
/** /**
* Factory for {@link ArabicStemFilter} * Factory for {@link ArabicStemFilter}.
**/ * <pre class="prettyprint" >
* &lt;fieldType name="text_arstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.ArabicStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class ArabicStemFilterFactory extends BaseTokenFilterFactory{ public class ArabicStemFilterFactory extends BaseTokenFilterFactory{

View File

@ -21,7 +21,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.br.BrazilianStemFilter; import org.apache.lucene.analysis.br.BrazilianStemFilter;
/** Factory for {@link BrazilianStemFilter} */ /**
* Factory for {@link BrazilianStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_brstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.BrazilianStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class BrazilianStemFilterFactory extends BaseTokenFilterFactory { public class BrazilianStemFilterFactory extends BaseTokenFilterFactory {
public BrazilianStemFilter create(TokenStream in) { public BrazilianStemFilter create(TokenStream in) {
return new BrazilianStemFilter(in); return new BrazilianStemFilter(in);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.bg.BulgarianStemFilter; import org.apache.lucene.analysis.bg.BulgarianStemFilter;
/** Factory for {@link BulgarianStemFilter} */ /**
* Factory for {@link BulgarianStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_bgstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.BulgarianStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class BulgarianStemFilterFactory extends BaseTokenFilterFactory { public class BulgarianStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {
return new BulgarianStemFilter(input); return new BulgarianStemFilter(input);

View File

@ -22,7 +22,16 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.cjk.CJKTokenizer; import org.apache.lucene.analysis.cjk.CJKTokenizer;
import java.io.Reader; import java.io.Reader;
/** Factory for {@link CJKTokenizer} */ /**
* Factory for {@link CJKTokenizer}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.CJKTokenizerFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class CJKTokenizerFactory extends BaseTokenizerFactory { public class CJKTokenizerFactory extends BaseTokenizerFactory {
public CJKTokenizer create(Reader in) { public CJKTokenizer create(Reader in) {
return new CJKTokenizer(in); return new CJKTokenizer(in);

View File

@ -33,7 +33,7 @@ import java.util.StringTokenizer;
* The factory takes parameters:<br/> * The factory takes parameters:<br/>
* "onlyFirstWord" - should each word be capitalized or all of the words?<br/> * "onlyFirstWord" - should each word be capitalized or all of the words?<br/>
* "keep" - a keep word list. Each word that should be kept separated by whitespace.<br/> * "keep" - a keep word list. Each word that should be kept separated by whitespace.<br/>
* "keepIgnoreCase - true or false. If true, the keep list will be considered case-insensitive. * "keepIgnoreCase - true or false. If true, the keep list will be considered case-insensitive.<br/>
* "forceFirstLetter" - Force the first letter to be capitalized even if it is in the keep list<br/> * "forceFirstLetter" - Force the first letter to be capitalized even if it is in the keep list<br/>
* "okPrefix" - do not change word capitalization if a word begins with something in this list. * "okPrefix" - do not change word capitalization if a word begins with something in this list.
* for example if "McK" is on the okPrefix list, the word "McKinley" should not be changed to * for example if "McK" is on the okPrefix list, the word "McKinley" should not be changed to
@ -43,6 +43,16 @@ import java.util.StringTokenizer;
* "maxWordCount" - if the token contains more then maxWordCount words, the capitalization is * "maxWordCount" - if the token contains more then maxWordCount words, the capitalization is
* assumed to be correct.<br/> * assumed to be correct.<br/>
* *
* <pre class="prettyprint" >
* &lt;fieldType name="text_cptlztn" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.CapitalizationFilterFactory" onlyFirstWord="true"
* keep="java solr lucene" keepIgnoreCase="false"
* okPrefix="McK McD McA"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @version $Id$ * @version $Id$
* @since solr 1.3 * @since solr 1.3
*/ */

View File

@ -22,6 +22,15 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.ClassicFilter; import org.apache.lucene.analysis.standard.ClassicFilter;
/** /**
* Factory for {@link ClassicFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.ClassicFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @version $Id$ * @version $Id$
*/ */
public class ClassicFilterFactory extends BaseTokenFilterFactory { public class ClassicFilterFactory extends BaseTokenFilterFactory {

View File

@ -24,6 +24,14 @@ import java.io.Reader;
import java.util.Map; import java.util.Map;
/** /**
* Factory for {@link ClassicTokenizer}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.ClassicTokenizerFactory" maxTokenLength="120"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @version $Id$ * @version $Id$
*/ */

View File

@ -56,11 +56,19 @@ import org.apache.solr.util.plugin.ResourceLoaderAware;
* <li>strength: 'primary','secondary','tertiary', or 'identical' (optional) * <li>strength: 'primary','secondary','tertiary', or 'identical' (optional)
* <li>decomposition: 'no','canonical', or 'full' (optional) * <li>decomposition: 'no','canonical', or 'full' (optional)
* </ul> * </ul>
* *
* <pre class="prettyprint" >
* &lt;fieldType name="text_clltnky" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.CollationKeyFilterFactory" language="ja" country="JP"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @see Collator * @see Collator
* @see Locale * @see Locale
* @see RuleBasedCollator * @see RuleBasedCollator
* @since solr 1.5 * @since solr 3.1
*/ */
public class CollationKeyFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware { public class CollationKeyFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
private Collator collator; private Collator collator;

View File

@ -27,7 +27,15 @@ import org.apache.solr.common.ResourceLoader;
import org.apache.solr.util.plugin.ResourceLoaderAware; import org.apache.solr.util.plugin.ResourceLoaderAware;
/** /**
* Constructs a CommonGramsFilter * Constructs a {@link CommonGramsFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_cmmngrms" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.CommonGramsFilterFactory" words="commongramsstopwords.txt" ignoreCase="false"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/ */
/* /*

View File

@ -29,10 +29,18 @@ import org.apache.solr.common.ResourceLoader;
import org.apache.solr.util.plugin.ResourceLoaderAware; import org.apache.solr.util.plugin.ResourceLoaderAware;
/** /**
* Construct CommonGramsQueryFilter * Construct {@link CommonGramsQueryFilter}.
* *
* This is pretty close to a straight copy from StopFilterFactory * This is pretty close to a straight copy from {@link StopFilterFactory}.
* *
* <pre class="prettyprint" >
* &lt;fieldType name="text_cmmngrmsqry" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.CommonGramsQueryFilterFactory" words="commongramsquerystopwords.txt" ignoreCase="false"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/ */
public class CommonGramsQueryFilterFactory extends BaseTokenFilterFactory public class CommonGramsQueryFilterFactory extends BaseTokenFilterFactory
implements ResourceLoaderAware { implements ResourceLoaderAware {

View File

@ -20,7 +20,16 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cz.CzechStemFilter; import org.apache.lucene.analysis.cz.CzechStemFilter;
/** Factory for {@link CzechStemFilter} */ /**
* Factory for {@link CzechStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_czstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.CzechStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*/
public class CzechStemFilterFactory extends BaseTokenFilterFactory { public class CzechStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {
return new CzechStemFilter(input); return new CzechStemFilter(input);

View File

@ -31,8 +31,17 @@ import java.util.Map;
/** /**
* *
* Factory for {@link DelimitedPayloadTokenFilter} * Factory for {@link DelimitedPayloadTokenFilter}.
**/ * <pre class="prettyprint" >
* &lt;fieldType name="text_dlmtd" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float" delimiter="|"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*
*/
public class DelimitedPayloadTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware { public class DelimitedPayloadTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
public static final String ENCODER_ATTR = "encoder"; public static final String ENCODER_ATTR = "encoder";
public static final String DELIMITER_ATTR = "delimiter"; public static final String DELIMITER_ATTR = "delimiter";

View File

@ -28,7 +28,18 @@ import org.apache.lucene.analysis.TokenStream;
import java.util.Map; import java.util.Map;
import java.io.IOException; import java.io.IOException;
/** Factory for {@link DictionaryCompoundWordTokenFilter} */ /**
* Factory for {@link DictionaryCompoundWordTokenFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_dictcomp" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.DictionaryCompoundWordTokenFilterFactory" dictionary="dictionary.txt"
* minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="true"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class DictionaryCompoundWordTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware { public class DictionaryCompoundWordTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
private CharArraySet dictionary; private CharArraySet dictionary;
private String dictFile; private String dictFile;

View File

@ -21,6 +21,17 @@ import java.util.Map;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter; import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
/**
* Factory for {@link DoubleMetaphoneFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_dblmtphn" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.DoubleMetaphoneFilterFactory" inject="true" maxCodeLength="4"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class DoubleMetaphoneFilterFactory extends BaseTokenFilterFactory public class DoubleMetaphoneFilterFactory extends BaseTokenFilterFactory
{ {
public static final String INJECT = "inject"; public static final String INJECT = "inject";

View File

@ -23,6 +23,14 @@ import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
/** /**
* Creates new instances of {@link EdgeNGramTokenFilter}. * Creates new instances of {@link EdgeNGramTokenFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.EdgeNGramFilterFactory" side="front" minGramSize="1" maxGramSize="1"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/ */
public class EdgeNGramFilterFactory extends BaseTokenFilterFactory { public class EdgeNGramFilterFactory extends BaseTokenFilterFactory {
private int maxGramSize = 0; private int maxGramSize = 0;

View File

@ -24,6 +24,13 @@ import java.util.Map;
/** /**
* Creates new instances of {@link EdgeNGramTokenizer}. * Creates new instances of {@link EdgeNGramTokenizer}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.EdgeNGramTokenizerFactory" side="front" minGramSize="1" maxGramSize="1"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/ */
public class EdgeNGramTokenizerFactory extends BaseTokenizerFactory { public class EdgeNGramTokenizerFactory extends BaseTokenizerFactory {
private int maxGramSize = 0; private int maxGramSize = 0;

View File

@ -27,7 +27,17 @@ import org.apache.lucene.analysis.util.CharArraySet;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
/** Factory for {@link ElisionFilter} */ /**
* Factory for {@link ElisionFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_elsn" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.ElisionFilterFactory" articles="stopwordarticles.txt"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class ElisionFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware { public class ElisionFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
private CharArraySet articles; private CharArraySet articles;

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.en.EnglishMinimalStemFilter; import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
/** Factory for {@link EnglishMinimalStemFilter} */ /**
* Factory for {@link EnglishMinimalStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_enminstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.EnglishMinimalStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class EnglishMinimalStemFilterFactory extends BaseTokenFilterFactory { public class EnglishMinimalStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {
return new EnglishMinimalStemFilter(input); return new EnglishMinimalStemFilter(input);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.en.EnglishPossessiveFilter; import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
/** Factory for {@link EnglishPossessiveFilter} */ /**
* Factory for {@link EnglishPossessiveFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_enpossessive" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.EnglishPossessiveFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class EnglishPossessiveFilterFactory extends BaseTokenFilterFactory { public class EnglishPossessiveFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {
return new EnglishPossessiveFilter(input); return new EnglishPossessiveFilter(input);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.fi.FinnishLightStemFilter; import org.apache.lucene.analysis.fi.FinnishLightStemFilter;
/** Factory for {@link FinnishLightStemFilter} */ /**
* Factory for {@link FinnishLightStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_filgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.FinnishLightStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class FinnishLightStemFilterFactory extends BaseTokenFilterFactory { public class FinnishLightStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {
return new FinnishLightStemFilter(input); return new FinnishLightStemFilter(input);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.fr.FrenchLightStemFilter; import org.apache.lucene.analysis.fr.FrenchLightStemFilter;
/** Factory for {@link FrenchLightStemFilter} */ /**
* Factory for {@link FrenchLightStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_frlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.FrenchLightStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class FrenchLightStemFilterFactory extends BaseTokenFilterFactory { public class FrenchLightStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {
return new FrenchLightStemFilter(input); return new FrenchLightStemFilter(input);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.fr.FrenchMinimalStemFilter; import org.apache.lucene.analysis.fr.FrenchMinimalStemFilter;
/** Factory for {@link FrenchMinimalStemFilter} */ /**
* Factory for {@link FrenchMinimalStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_frminstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.FrenchMinimalStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class FrenchMinimalStemFilterFactory extends BaseTokenFilterFactory { public class FrenchMinimalStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {
return new FrenchMinimalStemFilter(input); return new FrenchMinimalStemFilter(input);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.gl.GalicianStemFilter; import org.apache.lucene.analysis.gl.GalicianStemFilter;
/** Factory for {@link GalicianStemFilter} */ /**
* Factory for {@link GalicianStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_glstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.GalicianStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class GalicianStemFilterFactory extends BaseTokenFilterFactory { public class GalicianStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {
return new GalicianStemFilter(input); return new GalicianStemFilter(input);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.de.GermanLightStemFilter; import org.apache.lucene.analysis.de.GermanLightStemFilter;
/** Factory for {@link GermanLightStemFilter} */ /**
* Factory for {@link GermanLightStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_delgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.GermanLightStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class GermanLightStemFilterFactory extends BaseTokenFilterFactory { public class GermanLightStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {
return new GermanLightStemFilter(input); return new GermanLightStemFilter(input);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.de.GermanMinimalStemFilter; import org.apache.lucene.analysis.de.GermanMinimalStemFilter;
/** Factory for {@link GermanMinimalStemFilter} */ /**
* Factory for {@link GermanMinimalStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_deminstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.GermanMinimalStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class GermanMinimalStemFilterFactory extends BaseTokenFilterFactory { public class GermanMinimalStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {
return new GermanMinimalStemFilter(input); return new GermanMinimalStemFilter(input);

View File

@ -22,7 +22,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.de.GermanStemFilter; import org.apache.lucene.analysis.de.GermanStemFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
/** Factory for {@link GermanStemFilter} */ /**
* Factory for {@link GermanStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_destem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.GermanStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class GermanStemFilterFactory extends BaseTokenFilterFactory { public class GermanStemFilterFactory extends BaseTokenFilterFactory {
public GermanStemFilter create(TokenStream in) { public GermanStemFilter create(TokenStream in) {
return new GermanStemFilter(in); return new GermanStemFilter(in);

View File

@ -26,7 +26,17 @@ import org.apache.lucene.analysis.el.GreekLowerCaseFilter;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
/** Factory for {@link GreekLowerCaseFilter} */ /**
* Factory for {@link GreekLowerCaseFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_glc" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.GreekLowerCaseFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class GreekLowerCaseFilterFactory extends BaseTokenFilterFactory public class GreekLowerCaseFilterFactory extends BaseTokenFilterFactory
{ {

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.el.GreekStemFilter; import org.apache.lucene.analysis.el.GreekStemFilter;
/** Factory for {@link GreekStemFilter} */ /**
* Factory for {@link GreekStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_gstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.GreekStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class GreekStemFilterFactory extends BaseTokenFilterFactory { public class GreekStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {

View File

@ -21,7 +21,18 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.CharStream; import org.apache.lucene.analysis.CharStream;
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter; import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
public class HTMLStripCharFilterFactory extends BaseCharFilterFactory { /**
* Factory for {@link HTMLStripCharFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_html" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;charFilter class="solr.HTMLStripCharFilterFactory"/&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre
* @version $Id$
*/
public class HTMLStripCharFilterFactory extends BaseCharFilterFactory {
public HTMLStripCharFilter create(CharStream input) { public HTMLStripCharFilter create(CharStream input) {
return new HTMLStripCharFilter(input); return new HTMLStripCharFilter(input);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.hi.HindiNormalizationFilter; import org.apache.lucene.analysis.hi.HindiNormalizationFilter;
/** Factory for {@link HindiNormalizationFilter} */ /**
* Factory for {@link HindiNormalizationFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_hinormal" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.HindiNormalizationFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class HindiNormalizationFilterFactory extends BaseTokenFilterFactory { public class HindiNormalizationFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {
return new HindiNormalizationFilter(input); return new HindiNormalizationFilter(input);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.hi.HindiStemFilter; import org.apache.lucene.analysis.hi.HindiStemFilter;
/** Factory for {@link HindiStemFilter} */ /**
* Factory for {@link HindiStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_histem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.HindiStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class HindiStemFilterFactory extends BaseTokenFilterFactory { public class HindiStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {
return new HindiStemFilter(input); return new HindiStemFilter(input);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.hu.HungarianLightStemFilter; import org.apache.lucene.analysis.hu.HungarianLightStemFilter;
/** Factory for {@link HungarianLightStemFilter} */ /**
* Factory for {@link HungarianLightStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_hulgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.HungarianLightStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class HungarianLightStemFilterFactory extends BaseTokenFilterFactory { public class HungarianLightStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) { public TokenStream create(TokenStream input) {
return new HungarianLightStemFilter(input); return new HungarianLightStemFilter(input);

View File

@ -22,7 +22,15 @@ import org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
import org.apache.solr.analysis.BaseTokenFilterFactory; import org.apache.solr.analysis.BaseTokenFilterFactory;
/** /**
* Factory for {@link HyphenatedWordsFilter} * Factory for {@link HyphenatedWordsFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_hyphn" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.HyphenatedWordsFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/ */
public class HyphenatedWordsFilterFactory extends BaseTokenFilterFactory { public class HyphenatedWordsFilterFactory extends BaseTokenFilterFactory {
public HyphenatedWordsFilter create(TokenStream input) { public HyphenatedWordsFilter create(TokenStream input) {

View File

@ -33,7 +33,7 @@ import java.io.InputStream;
import org.xml.sax.InputSource; import org.xml.sax.InputSource;
/** /**
* Factory for {@link HyphenationCompoundWordTokenFilter} * Factory for {@link HyphenationCompoundWordTokenFilter}.
* <p> * <p>
* This factory accepts the following parameters: * This factory accepts the following parameters:
* <ul> * <ul>
@ -48,6 +48,15 @@ import org.xml.sax.InputSource;
* to the stream. defaults to false. * to the stream. defaults to false.
* </ul> * </ul>
* <p> * <p>
* <pre class="prettyprint" >
* &lt;fieldType name="text_hyphncomp" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.HyphenationCompoundWordTokenFilterFactory" hyphenator="hyphenator.xml" encoding="UTF-8"
* dictionary="dictionary.txt" minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="false"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
* @see HyphenationCompoundWordTokenFilter * @see HyphenationCompoundWordTokenFilter
*/ */
public class HyphenationCompoundWordTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware { public class HyphenationCompoundWordTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {