LUCENE-2894: add schema.xml samples for solr analysis factories (A to H)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1073336 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Koji Sekiguchi 2011-02-22 14:17:10 +00:00
parent 43113fe217
commit e5309e652b
36 changed files with 375 additions and 36 deletions

View File

@ -21,7 +21,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
import org.apache.lucene.analysis.TokenStream;
/** Factory for {@link ASCIIFoldingFilter} */
/**
* Factory for {@link ASCIIFoldingFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_ascii" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.ASCIIFoldingFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class ASCIIFoldingFilterFactory extends BaseTokenFilterFactory {
public ASCIIFoldingFilter create(TokenStream input) {
return new ASCIIFoldingFilter(input);

View File

@ -21,8 +21,16 @@ import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
/**
* Factory for {@link ArabicNormalizationFilter}
**/
* Factory for {@link ArabicNormalizationFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_arnormal" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.ArabicNormalizationFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class ArabicNormalizationFilterFactory extends BaseTokenFilterFactory{
public ArabicNormalizationFilter create(TokenStream input) {

View File

@ -21,8 +21,16 @@ import org.apache.lucene.analysis.ar.ArabicStemFilter;
/**
* Factory for {@link ArabicStemFilter}
**/
* Factory for {@link ArabicStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_arstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.ArabicStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class ArabicStemFilterFactory extends BaseTokenFilterFactory{

View File

@ -21,7 +21,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.br.BrazilianStemFilter;
/** Factory for {@link BrazilianStemFilter} */
/**
* Factory for {@link BrazilianStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_brstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.BrazilianStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class BrazilianStemFilterFactory extends BaseTokenFilterFactory {
public BrazilianStemFilter create(TokenStream in) {
return new BrazilianStemFilter(in);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.bg.BulgarianStemFilter;
/** Factory for {@link BulgarianStemFilter} */
/**
* Factory for {@link BulgarianStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_bgstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.BulgarianStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class BulgarianStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new BulgarianStemFilter(input);

View File

@ -22,7 +22,16 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.cjk.CJKTokenizer;
import java.io.Reader;
/** Factory for {@link CJKTokenizer} */
/**
* Factory for {@link CJKTokenizer}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.CJKTokenizerFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class CJKTokenizerFactory extends BaseTokenizerFactory {
public CJKTokenizer create(Reader in) {
return new CJKTokenizer(in);

View File

@ -33,7 +33,7 @@ import java.util.StringTokenizer;
* The factory takes parameters:<br/>
* "onlyFirstWord" - should each word be capitalized or all of the words?<br/>
* "keep" - a keep word list. Each word that should be kept separated by whitespace.<br/>
* "keepIgnoreCase - true or false. If true, the keep list will be considered case-insensitive.
* "keepIgnoreCase - true or false. If true, the keep list will be considered case-insensitive.<br/>
* "forceFirstLetter" - Force the first letter to be capitalized even if it is in the keep list<br/>
* "okPrefix" - do not change word capitalization if a word begins with something in this list.
* for example if "McK" is on the okPrefix list, the word "McKinley" should not be changed to
@ -43,6 +43,16 @@ import java.util.StringTokenizer;
* "maxWordCount" - if the token contains more then maxWordCount words, the capitalization is
* assumed to be correct.<br/>
*
* <pre class="prettyprint" >
* &lt;fieldType name="text_cptlztn" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.CapitalizationFilterFactory" onlyFirstWord="true"
* keep="java solr lucene" keepIgnoreCase="false"
* okPrefix="McK McD McA"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @version $Id$
* @since solr 1.3
*/

View File

@ -22,6 +22,15 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.ClassicFilter;
/**
* Factory for {@link ClassicFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.ClassicFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @version $Id$
*/
public class ClassicFilterFactory extends BaseTokenFilterFactory {

View File

@ -24,6 +24,14 @@ import java.io.Reader;
import java.util.Map;
/**
* Factory for {@link ClassicTokenizer}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.ClassicTokenizerFactory" maxTokenLength="120"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @version $Id$
*/

View File

@ -56,11 +56,19 @@ import org.apache.solr.util.plugin.ResourceLoaderAware;
* <li>strength: 'primary','secondary','tertiary', or 'identical' (optional)
* <li>decomposition: 'no','canonical', or 'full' (optional)
* </ul>
*
*
* <pre class="prettyprint" >
* &lt;fieldType name="text_clltnky" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.CollationKeyFilterFactory" language="ja" country="JP"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @see Collator
* @see Locale
* @see RuleBasedCollator
* @since solr 1.5
* @since solr 3.1
*/
public class CollationKeyFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
private Collator collator;

View File

@ -27,7 +27,15 @@ import org.apache.solr.common.ResourceLoader;
import org.apache.solr.util.plugin.ResourceLoaderAware;
/**
* Constructs a CommonGramsFilter
* Constructs a {@link CommonGramsFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_cmmngrms" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.CommonGramsFilterFactory" words="commongramsstopwords.txt" ignoreCase="false"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
/*

View File

@ -29,10 +29,18 @@ import org.apache.solr.common.ResourceLoader;
import org.apache.solr.util.plugin.ResourceLoaderAware;
/**
* Construct CommonGramsQueryFilter
* Construct {@link CommonGramsQueryFilter}.
*
* This is pretty close to a straight copy from StopFilterFactory
* This is pretty close to a straight copy from {@link StopFilterFactory}.
*
* <pre class="prettyprint" >
* &lt;fieldType name="text_cmmngrmsqry" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.CommonGramsQueryFilterFactory" words="commongramsquerystopwords.txt" ignoreCase="false"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class CommonGramsQueryFilterFactory extends BaseTokenFilterFactory
implements ResourceLoaderAware {

View File

@ -20,7 +20,16 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cz.CzechStemFilter;
/** Factory for {@link CzechStemFilter} */
/**
* Factory for {@link CzechStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_czstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.CzechStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*/
public class CzechStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new CzechStemFilter(input);

View File

@ -31,8 +31,17 @@ import java.util.Map;
/**
*
* Factory for {@link DelimitedPayloadTokenFilter}
**/
* Factory for {@link DelimitedPayloadTokenFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_dlmtd" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float" delimiter="|"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*
*/
public class DelimitedPayloadTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
public static final String ENCODER_ATTR = "encoder";
public static final String DELIMITER_ATTR = "delimiter";

View File

@ -28,7 +28,18 @@ import org.apache.lucene.analysis.TokenStream;
import java.util.Map;
import java.io.IOException;
/** Factory for {@link DictionaryCompoundWordTokenFilter} */
/**
* Factory for {@link DictionaryCompoundWordTokenFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_dictcomp" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.DictionaryCompoundWordTokenFilterFactory" dictionary="dictionary.txt"
* minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="true"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class DictionaryCompoundWordTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
private CharArraySet dictionary;
private String dictFile;

View File

@ -21,6 +21,17 @@ import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
/**
* Factory for {@link DoubleMetaphoneFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_dblmtphn" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.DoubleMetaphoneFilterFactory" inject="true" maxCodeLength="4"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class DoubleMetaphoneFilterFactory extends BaseTokenFilterFactory
{
public static final String INJECT = "inject";

View File

@ -23,6 +23,14 @@ import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
/**
* Creates new instances of {@link EdgeNGramTokenFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.EdgeNGramFilterFactory" side="front" minGramSize="1" maxGramSize="1"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class EdgeNGramFilterFactory extends BaseTokenFilterFactory {
private int maxGramSize = 0;

View File

@ -24,6 +24,13 @@ import java.util.Map;
/**
* Creates new instances of {@link EdgeNGramTokenizer}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.EdgeNGramTokenizerFactory" side="front" minGramSize="1" maxGramSize="1"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class EdgeNGramTokenizerFactory extends BaseTokenizerFactory {
private int maxGramSize = 0;

View File

@ -27,7 +27,17 @@ import org.apache.lucene.analysis.util.CharArraySet;
import java.io.IOException;
import org.apache.lucene.analysis.TokenStream;
/** Factory for {@link ElisionFilter} */
/**
* Factory for {@link ElisionFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_elsn" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.ElisionFilterFactory" articles="stopwordarticles.txt"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class ElisionFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
private CharArraySet articles;

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
/** Factory for {@link EnglishMinimalStemFilter} */
/**
* Factory for {@link EnglishMinimalStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_enminstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.EnglishMinimalStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class EnglishMinimalStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new EnglishMinimalStemFilter(input);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
/** Factory for {@link EnglishPossessiveFilter} */
/**
* Factory for {@link EnglishPossessiveFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_enpossessive" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.EnglishPossessiveFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class EnglishPossessiveFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new EnglishPossessiveFilter(input);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.fi.FinnishLightStemFilter;
/** Factory for {@link FinnishLightStemFilter} */
/**
* Factory for {@link FinnishLightStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_filgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.FinnishLightStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class FinnishLightStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new FinnishLightStemFilter(input);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.fr.FrenchLightStemFilter;
/** Factory for {@link FrenchLightStemFilter} */
/**
* Factory for {@link FrenchLightStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_frlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.FrenchLightStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class FrenchLightStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new FrenchLightStemFilter(input);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.fr.FrenchMinimalStemFilter;
/** Factory for {@link FrenchMinimalStemFilter} */
/**
* Factory for {@link FrenchMinimalStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_frminstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.FrenchMinimalStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class FrenchMinimalStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new FrenchMinimalStemFilter(input);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.gl.GalicianStemFilter;
/** Factory for {@link GalicianStemFilter} */
/**
* Factory for {@link GalicianStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_glstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.GalicianStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class GalicianStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new GalicianStemFilter(input);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.de.GermanLightStemFilter;
/** Factory for {@link GermanLightStemFilter} */
/**
* Factory for {@link GermanLightStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_delgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.GermanLightStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class GermanLightStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new GermanLightStemFilter(input);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.de.GermanMinimalStemFilter;
/** Factory for {@link GermanMinimalStemFilter} */
/**
* Factory for {@link GermanMinimalStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_deminstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.GermanMinimalStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class GermanMinimalStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new GermanMinimalStemFilter(input);

View File

@ -22,7 +22,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.de.GermanStemFilter;
import org.apache.lucene.analysis.TokenStream;
/** Factory for {@link GermanStemFilter} */
/**
* Factory for {@link GermanStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_destem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.GermanStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class GermanStemFilterFactory extends BaseTokenFilterFactory {
public GermanStemFilter create(TokenStream in) {
return new GermanStemFilter(in);

View File

@ -26,7 +26,17 @@ import org.apache.lucene.analysis.el.GreekLowerCaseFilter;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
/** Factory for {@link GreekLowerCaseFilter} */
/**
* Factory for {@link GreekLowerCaseFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_glc" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.GreekLowerCaseFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class GreekLowerCaseFilterFactory extends BaseTokenFilterFactory
{

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.el.GreekStemFilter;
/** Factory for {@link GreekStemFilter} */
/**
* Factory for {@link GreekStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_gstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.GreekStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class GreekStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {

View File

@ -21,7 +21,18 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.CharStream;
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
public class HTMLStripCharFilterFactory extends BaseCharFilterFactory {
/**
* Factory for {@link HTMLStripCharFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_html" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;charFilter class="solr.HTMLStripCharFilterFactory"/&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre
* @version $Id$
*/
public class HTMLStripCharFilterFactory extends BaseCharFilterFactory {
public HTMLStripCharFilter create(CharStream input) {
return new HTMLStripCharFilter(input);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.hi.HindiNormalizationFilter;
/** Factory for {@link HindiNormalizationFilter} */
/**
* Factory for {@link HindiNormalizationFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_hinormal" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.HindiNormalizationFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class HindiNormalizationFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new HindiNormalizationFilter(input);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.hi.HindiStemFilter;
/** Factory for {@link HindiStemFilter} */
/**
* Factory for {@link HindiStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_histem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.HindiStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class HindiStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new HindiStemFilter(input);

View File

@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.hu.HungarianLightStemFilter;
/** Factory for {@link HungarianLightStemFilter} */
/**
* Factory for {@link HungarianLightStemFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_hulgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.HungarianLightStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class HungarianLightStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new HungarianLightStemFilter(input);

View File

@ -22,7 +22,15 @@ import org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
import org.apache.solr.analysis.BaseTokenFilterFactory;
/**
* Factory for {@link HyphenatedWordsFilter}
* Factory for {@link HyphenatedWordsFilter}.
* <pre class="prettyprint" >
* &lt;fieldType name="text_hyphn" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.HyphenatedWordsFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
*/
public class HyphenatedWordsFilterFactory extends BaseTokenFilterFactory {
public HyphenatedWordsFilter create(TokenStream input) {

View File

@ -33,7 +33,7 @@ import java.io.InputStream;
import org.xml.sax.InputSource;
/**
* Factory for {@link HyphenationCompoundWordTokenFilter}
* Factory for {@link HyphenationCompoundWordTokenFilter}.
* <p>
* This factory accepts the following parameters:
* <ul>
@ -48,6 +48,15 @@ import org.xml.sax.InputSource;
* to the stream. defaults to false.
* </ul>
* <p>
* <pre class="prettyprint" >
* &lt;fieldType name="text_hyphncomp" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.HyphenationCompoundWordTokenFilterFactory" hyphenator="hyphenator.xml" encoding="UTF-8"
* dictionary="dictionary.txt" minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="false"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* @version $Id$
* @see HyphenationCompoundWordTokenFilter
*/
public class HyphenationCompoundWordTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {