mirror of https://github.com/apache/lucene.git
SOLR-12956: Add Javadoc @since tag to Analyzer component classes
This commit is contained in:
parent
2d95b740db
commit
c07df19666
|
@ -47,6 +47,7 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
|
|||
* <li>Arabic stop words file: a set of default Arabic stop words.
|
||||
* </ul>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class ArabicAnalyzer extends StopwordAnalyzerBase {
|
||||
|
||||
|
|
|
@ -34,6 +34,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.ArabicNormalizationFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class ArabicNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
|
||||
|
||||
|
|
|
@ -33,6 +33,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.ArabicStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class ArabicStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
|
|
|
@ -36,6 +36,8 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
|
|||
* This analyzer implements light-stemming as specified by: <i> Searching
|
||||
* Strategies for the Bulgarian Language </i>
|
||||
* http://members.unine.ch/jacques.savoy/Papers/BUIR.pdf
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
|
||||
|
||||
|
|
|
@ -33,6 +33,8 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
|
|||
|
||||
/**
|
||||
* Analyzer for Bengali.
|
||||
*
|
||||
* @since 7.1.0
|
||||
*/
|
||||
public final class BengaliAnalyzer extends StopwordAnalyzerBase {
|
||||
private final CharArraySet stemExclusionSet;
|
||||
|
|
|
@ -44,6 +44,8 @@ import org.apache.lucene.util.IOUtils;
|
|||
*
|
||||
* <p><b>NOTE</b>: This class uses the same {@link org.apache.lucene.util.Version}
|
||||
* dependent settings as {@link StandardAnalyzer}.</p>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
|
||||
/** File containing default Brazilian Portuguese stopwords. */
|
||||
|
|
|
@ -33,6 +33,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.BrazilianStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class BrazilianStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
|
|
|
@ -36,6 +36,8 @@ import org.tartarus.snowball.ext.CatalanStemmer;
|
|||
|
||||
/**
|
||||
* {@link Analyzer} for Catalan.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class CatalanAnalyzer extends StopwordAnalyzerBase {
|
||||
private final CharArraySet stemExclusionSet;
|
||||
|
|
|
@ -33,6 +33,8 @@ import java.util.regex.Pattern;
|
|||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class HTMLStripCharFilterFactory extends CharFilterFactory {
|
||||
final Set<String> escapedTags;
|
||||
|
|
|
@ -33,6 +33,8 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
|
|||
* normalizes content with {@link CJKWidthFilter}, folds case with
|
||||
* {@link LowerCaseFilter}, forms bigrams of CJK with {@link CJKBigramFilter},
|
||||
* and filters stopwords with {@link StopFilter}
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class CJKAnalyzer extends StopwordAnalyzerBase {
|
||||
|
||||
|
|
|
@ -36,6 +36,8 @@ import org.apache.lucene.util.IOUtils;
|
|||
|
||||
/**
|
||||
* {@link Analyzer} for Sorani Kurdish.
|
||||
*
|
||||
* @since 4.10.0
|
||||
*/
|
||||
public final class SoraniAnalyzer extends StopwordAnalyzerBase {
|
||||
private final CharArraySet stemExclusionSet;
|
||||
|
|
|
@ -37,6 +37,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.CommonGramsFilterFactory" words="commongramsstopwords.txt" ignoreCase="false"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class CommonGramsFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
// TODO: shared base class for Stop/Keep/CommonGrams?
|
||||
|
|
|
@ -34,6 +34,8 @@ import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
|
|||
* <filter class="solr.CommonGramsQueryFilterFactory" words="commongramsquerystopwords.txt" ignoreCase="false"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class CommonGramsQueryFilterFactory extends CommonGramsFilterFactory {
|
||||
|
||||
|
|
|
@ -36,6 +36,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="true"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class DictionaryCompoundWordTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
private CharArraySet dictionary;
|
||||
|
|
|
@ -22,6 +22,8 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
/**
|
||||
* "Tokenizes" the entire stream as a single token. This is useful
|
||||
* for data like zip codes, ids, and some product names.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class KeywordAnalyzer extends Analyzer {
|
||||
public KeywordAnalyzer() {
|
||||
|
|
|
@ -38,6 +38,8 @@ import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LE
|
|||
* MAX_TOKEN_LENGTH_LIMIT (1024*1024). It is rare to need to change this
|
||||
* else {@link KeywordTokenizer}::DEFAULT_BUFFER_SIZE</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class KeywordTokenizerFactory extends TokenizerFactory {
|
||||
private final int maxTokenLen;
|
||||
|
|
|
@ -40,6 +40,8 @@ import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LE
|
|||
* It is rare to need to change this
|
||||
* else {@link CharTokenizer}::DEFAULT_MAX_TOKEN_LEN</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class LetterTokenizerFactory extends TokenizerFactory {
|
||||
private final int maxTokenLen;
|
||||
|
|
|
@ -33,6 +33,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class LowerCaseFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
|
||||
|
||||
|
|
|
@ -24,6 +24,8 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
|
||||
/** An {@link Analyzer} that filters {@link LetterTokenizer}
|
||||
* with {@link LowerCaseFilter}
|
||||
*
|
||||
* @since 3.1
|
||||
**/
|
||||
public final class SimpleAnalyzer extends Analyzer {
|
||||
|
||||
|
|
|
@ -31,6 +31,8 @@ import org.apache.lucene.analysis.WordlistLoader;
|
|||
|
||||
/**
|
||||
* Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class StopAnalyzer extends StopwordAnalyzerBase {
|
||||
|
||||
|
|
|
@ -69,6 +69,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* for details.
|
||||
* </li>
|
||||
* </ul>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class StopFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
public static final String FORMAT_WORDSET = "wordset";
|
||||
|
|
|
@ -21,6 +21,8 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
|
||||
/**
|
||||
* An Analyzer that uses {@link UnicodeWhitespaceTokenizer}.
|
||||
*
|
||||
* @since 5.4.0
|
||||
**/
|
||||
public final class UnicodeWhitespaceAnalyzer extends Analyzer {
|
||||
|
||||
|
|
|
@ -21,6 +21,8 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
|
||||
/**
|
||||
* An Analyzer that uses {@link WhitespaceTokenizer}.
|
||||
*
|
||||
* @since 3.1
|
||||
**/
|
||||
public final class WhitespaceAnalyzer extends Analyzer {
|
||||
|
||||
|
|
|
@ -45,6 +45,8 @@ import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LE
|
|||
* It is rare to need to change this
|
||||
* else {@link CharTokenizer}::DEFAULT_MAX_TOKEN_LEN</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class WhitespaceTokenizerFactory extends TokenizerFactory {
|
||||
public static final String RULE_JAVA = "java";
|
||||
|
|
|
@ -89,6 +89,8 @@ import static org.apache.lucene.analysis.util.AnalysisSPILoader.newFactoryClassI
|
|||
* .endwhen()
|
||||
* .build();
|
||||
* </pre>
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
public final class CustomAnalyzer extends Analyzer {
|
||||
|
||||
|
|
|
@ -40,6 +40,8 @@ import org.apache.lucene.util.IOUtils;
|
|||
* all). A default set of stopwords is used unless an alternative list is
|
||||
* specified.
|
||||
* </p>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class CzechAnalyzer extends StopwordAnalyzerBase {
|
||||
/** File containing default Czech stopwords. */
|
||||
|
|
|
@ -37,6 +37,8 @@ import org.tartarus.snowball.ext.DanishStemmer;
|
|||
|
||||
/**
|
||||
* {@link Analyzer} for Danish.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class DanishAnalyzer extends StopwordAnalyzerBase {
|
||||
private final CharArraySet stemExclusionSet;
|
||||
|
|
|
@ -48,6 +48,8 @@ import org.apache.lucene.util.IOUtils;
|
|||
*
|
||||
* <p><b>NOTE</b>: This class uses the same {@link org.apache.lucene.util.Version}
|
||||
* dependent settings as {@link StandardAnalyzer}.</p>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class GermanAnalyzer extends StopwordAnalyzerBase {
|
||||
|
||||
|
|
|
@ -33,6 +33,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.GermanStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class GermanStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
|
|
|
@ -38,6 +38,8 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
|
|||
*
|
||||
* <p><b>NOTE</b>: This class uses the same {@link org.apache.lucene.util.Version}
|
||||
* dependent settings as {@link StandardAnalyzer}.</p>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class GreekAnalyzer extends StopwordAnalyzerBase {
|
||||
/** File containing default Greek stopwords. */
|
||||
|
|
|
@ -34,6 +34,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.GreekLowerCaseFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class GreekLowerCaseFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
|
||||
|
||||
|
|
|
@ -33,6 +33,8 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
|
|||
|
||||
/**
|
||||
* {@link Analyzer} for English.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class EnglishAnalyzer extends StopwordAnalyzerBase {
|
||||
|
||||
|
|
|
@ -33,6 +33,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.PorterStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class PorterStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
|
|
|
@ -36,6 +36,8 @@ import org.apache.lucene.util.IOUtils;
|
|||
|
||||
/**
|
||||
* {@link Analyzer} for Spanish.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class SpanishAnalyzer extends StopwordAnalyzerBase {
|
||||
private final CharArraySet stemExclusionSet;
|
||||
|
|
|
@ -34,6 +34,8 @@ import org.tartarus.snowball.ext.BasqueStemmer;
|
|||
|
||||
/**
|
||||
* {@link Analyzer} for Basque.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class BasqueAnalyzer extends StopwordAnalyzerBase {
|
||||
private final CharArraySet stemExclusionSet;
|
||||
|
|
|
@ -38,6 +38,8 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
|
|||
* zero-width non-joiner in addition to whitespace. Some persian-specific variant forms (such as farsi
|
||||
* yeh and keheh) are standardized. "Stemming" is accomplished via stopwords.
|
||||
* </p>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class PersianAnalyzer extends StopwordAnalyzerBase {
|
||||
|
||||
|
|
|
@ -35,6 +35,8 @@ import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
|||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class PersianCharFilterFactory extends CharFilterFactory implements MultiTermAwareComponent {
|
||||
|
||||
|
|
|
@ -35,6 +35,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.PersianNormalizationFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class PersianNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
|
||||
|
||||
|
|
|
@ -37,6 +37,8 @@ import org.tartarus.snowball.ext.FinnishStemmer;
|
|||
|
||||
/**
|
||||
* {@link Analyzer} for Finnish.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class FinnishAnalyzer extends StopwordAnalyzerBase {
|
||||
private final CharArraySet stemExclusionSet;
|
||||
|
|
|
@ -49,6 +49,8 @@ import org.apache.lucene.util.IOUtils;
|
|||
*
|
||||
* <p><b>NOTE</b>: This class uses the same {@link org.apache.lucene.util.Version}
|
||||
* dependent settings as {@link StandardAnalyzer}.</p>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class FrenchAnalyzer extends StopwordAnalyzerBase {
|
||||
|
||||
|
|
|
@ -35,6 +35,8 @@ import org.tartarus.snowball.ext.IrishStemmer;
|
|||
|
||||
/**
|
||||
* {@link Analyzer} for Irish.
|
||||
*
|
||||
* @since 3.6.0
|
||||
*/
|
||||
public final class IrishAnalyzer extends StopwordAnalyzerBase {
|
||||
private final CharArraySet stemExclusionSet;
|
||||
|
|
|
@ -35,6 +35,8 @@ import org.apache.lucene.util.IOUtils;
|
|||
|
||||
/**
|
||||
* {@link Analyzer} for Galician.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class GalicianAnalyzer extends StopwordAnalyzerBase {
|
||||
private final CharArraySet stemExclusionSet;
|
||||
|
|
|
@ -33,6 +33,8 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
|
|||
|
||||
/**
|
||||
* Analyzer for Hindi.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class HindiAnalyzer extends StopwordAnalyzerBase {
|
||||
private final CharArraySet stemExclusionSet;
|
||||
|
|
|
@ -37,6 +37,8 @@ import org.tartarus.snowball.ext.HungarianStemmer;
|
|||
|
||||
/**
|
||||
* {@link Analyzer} for Hungarian.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class HungarianAnalyzer extends StopwordAnalyzerBase {
|
||||
private final CharArraySet stemExclusionSet;
|
||||
|
|
|
@ -34,6 +34,8 @@ import org.tartarus.snowball.ext.ArmenianStemmer;
|
|||
|
||||
/**
|
||||
* {@link Analyzer} for Armenian.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
|
||||
private final CharArraySet stemExclusionSet;
|
||||
|
|
|
@ -31,6 +31,8 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
|
|||
|
||||
/**
|
||||
* Analyzer for Indonesian (Bahasa)
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
|
||||
/** File containing default Indonesian stopwords. */
|
||||
|
|
|
@ -38,6 +38,8 @@ import org.apache.lucene.util.IOUtils;
|
|||
|
||||
/**
|
||||
* {@link Analyzer} for Italian.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class ItalianAnalyzer extends StopwordAnalyzerBase {
|
||||
private final CharArraySet stemExclusionSet;
|
||||
|
|
|
@ -34,6 +34,8 @@ import org.tartarus.snowball.ext.LithuanianStemmer;
|
|||
|
||||
/**
|
||||
* {@link Analyzer} for Lithuanian.
|
||||
*
|
||||
* @since 5.3.0
|
||||
*/
|
||||
public final class LithuanianAnalyzer extends StopwordAnalyzerBase {
|
||||
private final CharArraySet stemExclusionSet;
|
||||
|
|
|
@ -35,6 +35,8 @@ import org.apache.lucene.util.IOUtils;
|
|||
|
||||
/**
|
||||
* {@link Analyzer} for Latvian.
|
||||
*
|
||||
* @since 3.2
|
||||
*/
|
||||
public final class LatvianAnalyzer extends StopwordAnalyzerBase {
|
||||
private final CharArraySet stemExclusionSet;
|
||||
|
|
|
@ -35,6 +35,8 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
* <filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="false"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class ASCIIFoldingFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
|
||||
private static final String PRESERVE_ORIGINAL = "preserveOriginal";
|
||||
|
|
|
@ -30,6 +30,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
|
||||
/**
|
||||
* Abstract parent class for analysis factories that create {@link ConditionalTokenFilter} instances
|
||||
*
|
||||
* @since 7.4.0
|
||||
*/
|
||||
public abstract class ConditionalTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
|
||||
|
|
|
@ -32,6 +32,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.HyphenatedWordsFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class HyphenatedWordsFilterFactory extends TokenFilterFactory {
|
||||
|
||||
|
|
|
@ -35,6 +35,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.KeepWordFilterFactory" words="keepwords.txt" ignoreCase="false"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class KeepWordFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
private final boolean ignoreCase;
|
||||
|
|
|
@ -31,6 +31,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.LengthFilterFactory" min="0" max="1" />
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class LengthFilterFactory extends TokenFilterFactory {
|
||||
final int min;
|
||||
|
|
|
@ -24,6 +24,8 @@ import org.apache.lucene.analysis.AnalyzerWrapper;
|
|||
* This Analyzer limits the number of tokens while indexing. It is
|
||||
* a replacement for the maximum field length setting inside {@link org.apache.lucene.index.IndexWriter}.
|
||||
* @see LimitTokenCountFilter
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class LimitTokenCountAnalyzer extends AnalyzerWrapper {
|
||||
private final Analyzer delegate;
|
||||
|
|
|
@ -47,6 +47,8 @@ import java.util.Map;
|
|||
*
|
||||
* <p>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
|
||||
* and query parsing.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class PerFieldAnalyzerWrapper extends DelegatingAnalyzerWrapper {
|
||||
private final Analyzer defaultAnalyzer;
|
||||
|
|
|
@ -75,6 +75,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* </fieldType></pre>
|
||||
*
|
||||
* <p>See related {@link org.apache.lucene.analysis.custom.CustomAnalyzer.Builder#whenTerm(Predicate)}
|
||||
*
|
||||
* @since 7.4.0
|
||||
*/
|
||||
public class ProtectedTermFilterFactory extends ConditionalTokenFilterFactory implements ResourceLoaderAware {
|
||||
|
||||
|
|
|
@ -32,6 +32,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class RemoveDuplicatesTokenFilterFactory extends TokenFilterFactory {
|
||||
|
||||
|
|
|
@ -36,6 +36,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* </fieldType></pre>
|
||||
*
|
||||
* @see TrimFilter
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class TrimFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
|
||||
|
||||
|
|
|
@ -36,6 +36,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* If the optional {@code prefix} parameter is used, the specified value will be prepended
|
||||
* to the type, e.g. with prefix="_type_", for a token "example.com" with type "<URL>",
|
||||
* the emitted synonym will have text "_type_<URL>".
|
||||
*
|
||||
* @since 7.3.0
|
||||
*/
|
||||
public class TypeAsSynonymFilterFactory extends TokenFilterFactory {
|
||||
private final String prefix;
|
||||
|
|
|
@ -52,6 +52,8 @@ import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*;
|
|||
* @deprecated Use {@link WordDelimiterGraphFilterFactory} instead: it produces a correct
|
||||
* token graph so that e.g. {@link PhraseQuery} works correctly when it's used in
|
||||
* the search time analyzer.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
@Deprecated
|
||||
public class WordDelimiterFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
|
|
|
@ -32,6 +32,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="2" preserveOriginal="true"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class EdgeNGramFilterFactory extends TokenFilterFactory {
|
||||
private final int maxGramSize;
|
||||
|
|
|
@ -31,6 +31,8 @@ import java.util.Map;
|
|||
* <tokenizer class="solr.EdgeNGramTokenizerFactory" minGramSize="1" maxGramSize="1"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class EdgeNGramTokenizerFactory extends TokenizerFactory {
|
||||
private final int maxGramSize;
|
||||
|
|
|
@ -32,6 +32,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.NGramFilterFactory" minGramSize="1" maxGramSize="2" preserveOriginal="true"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class NGramFilterFactory extends TokenFilterFactory {
|
||||
private final int maxGramSize;
|
||||
|
|
|
@ -33,6 +33,8 @@ import java.util.Map;
|
|||
* <tokenizer class="solr.NGramTokenizerFactory" minGramSize="1" maxGramSize="2"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class NGramTokenizerFactory extends TokenizerFactory {
|
||||
private final int maxGramSize;
|
||||
|
|
|
@ -47,6 +47,8 @@ import org.apache.lucene.util.IOUtils;
|
|||
* A default set of stopwords is used unless an alternative list is specified, but the
|
||||
* exclusion list is empty by default.
|
||||
* </p>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
// TODO: extend StopwordAnalyzerBase
|
||||
public final class DutchAnalyzer extends Analyzer {
|
||||
|
|
|
@ -37,6 +37,8 @@ import org.tartarus.snowball.ext.NorwegianStemmer;
|
|||
|
||||
/**
|
||||
* {@link Analyzer} for Norwegian.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
|
||||
private final CharArraySet stemExclusionSet;
|
||||
|
|
|
@ -66,6 +66,8 @@ import org.apache.lucene.util.AttributeFactory;
|
|||
* </analyzer>
|
||||
* </fieldType>
|
||||
* </pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class PathHierarchyTokenizerFactory extends TokenizerFactory {
|
||||
private final char delimiter;
|
||||
|
|
|
@ -36,6 +36,8 @@ import java.util.regex.Pattern;
|
|||
* </fieldType></pre>
|
||||
*
|
||||
* @see PatternReplaceFilter
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class PatternReplaceFilterFactory extends TokenFilterFactory {
|
||||
final Pattern pattern;
|
||||
|
|
|
@ -53,6 +53,8 @@ import org.apache.lucene.util.automaton.RegExp;
|
|||
* @lucene.experimental
|
||||
*
|
||||
* @see SimplePatternSplitTokenizer
|
||||
*
|
||||
* @since 6.5.0
|
||||
*/
|
||||
public class SimplePatternSplitTokenizerFactory extends TokenizerFactory {
|
||||
public static final String PATTERN = "pattern";
|
||||
|
|
|
@ -53,6 +53,8 @@ import org.apache.lucene.util.automaton.RegExp;
|
|||
* @lucene.experimental
|
||||
*
|
||||
* @see SimplePatternTokenizer
|
||||
*
|
||||
* @since 6.5.0
|
||||
*/
|
||||
public class SimplePatternTokenizerFactory extends TokenizerFactory {
|
||||
public static final String PATTERN = "pattern";
|
||||
|
|
|
@ -33,6 +33,8 @@ import java.util.Map;
|
|||
* <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float" delimiter="|"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class DelimitedPayloadTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
public static final String ENCODER_ATTR = "encoder";
|
||||
|
|
|
@ -30,6 +30,8 @@ import java.util.Map;
|
|||
* <filter class="solr.NumericPayloadTokenFilterFactory" payload="24" typeMatch="word"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class NumericPayloadTokenFilterFactory extends TokenFilterFactory {
|
||||
private final float payload;
|
||||
|
|
|
@ -32,6 +32,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.TokenOffsetPayloadTokenFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class TokenOffsetPayloadTokenFilterFactory extends TokenFilterFactory {
|
||||
|
||||
|
|
|
@ -32,6 +32,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.TypeAsPayloadTokenFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class TypeAsPayloadTokenFilterFactory extends TokenFilterFactory {
|
||||
|
||||
|
|
|
@ -36,6 +36,8 @@ import org.apache.lucene.util.IOUtils;
|
|||
|
||||
/**
|
||||
* {@link Analyzer} for Portuguese.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
|
||||
private final CharArraySet stemExclusionSet;
|
||||
|
|
|
@ -47,6 +47,8 @@ import org.apache.lucene.util.CharsRefBuilder;
|
|||
* a 38 million doc index which had a term in around 50% of docs and was causing TermQueries for
|
||||
* this term to take 2 seconds.
|
||||
* </p>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
|
||||
|
||||
|
|
|
@ -34,6 +34,8 @@ import org.tartarus.snowball.ext.RomanianStemmer;
|
|||
|
||||
/**
|
||||
* {@link Analyzer} for Romanian.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class RomanianAnalyzer extends StopwordAnalyzerBase {
|
||||
private final CharArraySet stemExclusionSet;
|
||||
|
|
|
@ -40,6 +40,8 @@ import org.apache.lucene.util.IOUtils;
|
|||
* Supports an external list of stopwords (words that
|
||||
* will not be indexed at all).
|
||||
* A default set of stopwords is used unless an alternative list is specified.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class RussianAnalyzer extends StopwordAnalyzerBase {
|
||||
|
||||
|
|
|
@ -31,6 +31,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <li>tokenSeparator - how tokens should be joined together in the shingle (default: space)
|
||||
* <li>fillerToken - what should be added in place of stop words (default: _ )
|
||||
* </ul>
|
||||
*
|
||||
* @since 7.4.0
|
||||
*/
|
||||
public class FixedShingleFilterFactory extends TokenFilterFactory {
|
||||
|
||||
|
|
|
@ -26,6 +26,8 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|||
* <p>
|
||||
* A shingle is another name for a token based n-gram.
|
||||
* </p>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class ShingleAnalyzerWrapper extends AnalyzerWrapper {
|
||||
|
||||
|
|
|
@ -32,6 +32,8 @@ import java.util.Map;
|
|||
* outputUnigrams="true" outputUnigramsIfNoShingles="false" tokenSeparator=" " fillerToken="_"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class ShingleFilterFactory extends TokenFilterFactory {
|
||||
private final int minShingleSize;
|
||||
|
|
|
@ -41,6 +41,8 @@ import org.tartarus.snowball.SnowballProgram;
|
|||
* <filter class="solr.SnowballPorterFilterFactory" protected="protectedkeyword.txt" language="English"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class SnowballPorterFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
public static final String PROTECTED_TOKENS = "protected";
|
||||
|
|
|
@ -36,6 +36,8 @@ import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
|||
* ClassicAnalyzer was named StandardAnalyzer in Lucene versions prior to 3.1.
|
||||
* As of 3.1, {@link StandardAnalyzer} implements Unicode text segmentation,
|
||||
* as specified by UAX#29.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class ClassicAnalyzer extends StopwordAnalyzerBase {
|
||||
|
||||
|
|
|
@ -30,6 +30,8 @@ import java.util.Map;
|
|||
* <tokenizer class="solr.ClassicTokenizerFactory" maxTokenLength="120"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class ClassicTokenizerFactory extends TokenizerFactory {
|
||||
private final int maxTokenLength;
|
||||
|
|
|
@ -30,6 +30,8 @@ import java.util.Map;
|
|||
* <tokenizer class="solr.StandardTokenizerFactory" maxTokenLength="255"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class StandardTokenizerFactory extends TokenizerFactory {
|
||||
private final int maxTokenLength;
|
||||
|
|
|
@ -32,6 +32,8 @@ import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
|||
* with {@link org.apache.lucene.analysis.LowerCaseFilter} and
|
||||
* {@link org.apache.lucene.analysis.StopFilter}, using a list of
|
||||
* English stop words.
|
||||
*
|
||||
* @since 3.6.0
|
||||
*/
|
||||
public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase {
|
||||
|
||||
|
|
|
@ -30,6 +30,8 @@ import java.util.Map;
|
|||
* <tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class UAX29URLEmailTokenizerFactory extends TokenizerFactory {
|
||||
private final int maxTokenLength;
|
||||
|
|
|
@ -37,6 +37,8 @@ import org.tartarus.snowball.ext.SwedishStemmer;
|
|||
|
||||
/**
|
||||
* {@link Analyzer} for Swedish.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class SwedishAnalyzer extends StopwordAnalyzerBase {
|
||||
private final CharArraySet stemExclusionSet;
|
||||
|
|
|
@ -76,6 +76,8 @@ import org.apache.lucene.analysis.util.TokenizerFactory;
|
|||
*
|
||||
* @deprecated Use {@link SynonymGraphFilterFactory} instead, but be sure to also
|
||||
* use {@link FlattenGraphFilterFactory} at index time (not at search time) as well.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
@Deprecated
|
||||
public class SynonymFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
|
|
|
@ -30,6 +30,8 @@ import org.apache.lucene.analysis.core.DecimalDigitFilter;
|
|||
|
||||
/**
|
||||
* {@link Analyzer} for Thai language. It uses {@link java.text.BreakIterator} to break words.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class ThaiAnalyzer extends StopwordAnalyzerBase {
|
||||
|
||||
|
|
|
@ -31,6 +31,8 @@ import org.apache.lucene.util.AttributeFactory;
|
|||
* <tokenizer class="solr.ThaiTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 4.10.0
|
||||
*/
|
||||
public class ThaiTokenizerFactory extends TokenizerFactory {
|
||||
|
||||
|
|
|
@ -33,6 +33,8 @@ import org.tartarus.snowball.ext.TurkishStemmer;
|
|||
|
||||
/**
|
||||
* {@link Analyzer} for Turkish.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class TurkishAnalyzer extends StopwordAnalyzerBase {
|
||||
private final CharArraySet stemExclusionSet;
|
||||
|
|
|
@ -26,6 +26,8 @@ import org.apache.lucene.analysis.CharFilter;
|
|||
/**
|
||||
* Abstract parent class for analysis factories that create {@link CharFilter}
|
||||
* instances.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public abstract class CharFilterFactory extends AbstractAnalysisFactory {
|
||||
|
||||
|
|
|
@ -35,6 +35,8 @@ import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
|||
* articles="stopwordarticles.txt" ignoreCase="true"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class ElisionFilterFactory extends TokenFilterFactory implements ResourceLoaderAware, MultiTermAwareComponent {
|
||||
private final String articlesFile;
|
||||
|
|
|
@ -25,6 +25,8 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
/**
|
||||
* Abstract parent class for analysis factories that create {@link org.apache.lucene.analysis.TokenFilter}
|
||||
* instances.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public abstract class TokenFilterFactory extends AbstractAnalysisFactory {
|
||||
|
||||
|
|
|
@ -27,6 +27,8 @@ import java.util.Set;
|
|||
/**
|
||||
* Abstract parent class for analysis factories that create {@link Tokenizer}
|
||||
* instances.
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public abstract class TokenizerFactory extends AbstractAnalysisFactory {
|
||||
|
||||
|
|
|
@ -31,6 +31,8 @@ import org.apache.lucene.util.AttributeFactory;
|
|||
* <tokenizer class="solr.WikipediaTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public class WikipediaTokenizerFactory extends TokenizerFactory {
|
||||
public static final String TOKEN_OUTPUT = "tokenOutput";
|
||||
|
|
|
@ -71,6 +71,8 @@ import java.text.Collator;
|
|||
* CollationKeyAnalyzer to generate index terms, do not use
|
||||
* ICUCollationKeyAnalyzer on the query side, or vice versa.
|
||||
* </p>
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class CollationKeyAnalyzer extends Analyzer {
|
||||
private final CollationAttributeFactory factory;
|
||||
|
|
|
@ -44,6 +44,8 @@ import com.ibm.icu.text.UnicodeSet;
|
|||
* @see ICUNormalizer2CharFilter
|
||||
* @see Normalizer2
|
||||
* @see FilteredNormalizer2
|
||||
*
|
||||
* @since 4.10.0
|
||||
*/
|
||||
public class ICUNormalizer2CharFilterFactory extends CharFilterFactory implements MultiTermAwareComponent {
|
||||
private final Normalizer2 normalizer;
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue