SOLR-12956: Add Javadoc @since tag to Analyzer component classes

This commit is contained in:
Alexandre Rafalovitch 2018-11-06 07:36:14 -05:00
parent 2d95b740db
commit c07df19666
127 changed files with 254 additions and 5 deletions

View File

@ -47,6 +47,7 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
* <li>Arabic stop words file: a set of default Arabic stop words.
* </ul>
*
* @since 3.1
*/
public final class ArabicAnalyzer extends StopwordAnalyzerBase {

View File

@ -34,6 +34,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.ArabicNormalizationFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class ArabicNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {

View File

@ -33,6 +33,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.ArabicStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class ArabicStemFilterFactory extends TokenFilterFactory {

View File

@ -36,6 +36,8 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
* This analyzer implements light-stemming as specified by: <i> Searching
* Strategies for the Bulgarian Language </i>
* http://members.unine.ch/jacques.savoy/Papers/BUIR.pdf
*
* @since 3.1
*/
public final class BulgarianAnalyzer extends StopwordAnalyzerBase {

View File

@ -33,6 +33,8 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
/**
* Analyzer for Bengali.
*
* @since 7.1.0
*/
public final class BengaliAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;

View File

@ -44,6 +44,8 @@ import org.apache.lucene.util.IOUtils;
*
* <p><b>NOTE</b>: This class uses the same {@link org.apache.lucene.util.Version}
* dependent settings as {@link StandardAnalyzer}.</p>
*
* @since 3.1
*/
public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
/** File containing default Brazilian Portuguese stopwords. */

View File

@ -33,6 +33,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.BrazilianStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class BrazilianStemFilterFactory extends TokenFilterFactory {

View File

@ -36,6 +36,8 @@ import org.tartarus.snowball.ext.CatalanStemmer;
/**
* {@link Analyzer} for Catalan.
*
* @since 3.1
*/
public final class CatalanAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;

View File

@ -33,6 +33,8 @@ import java.util.regex.Pattern;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class HTMLStripCharFilterFactory extends CharFilterFactory {
final Set<String> escapedTags;

View File

@ -33,6 +33,8 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
* normalizes content with {@link CJKWidthFilter}, folds case with
* {@link LowerCaseFilter}, forms bigrams of CJK with {@link CJKBigramFilter},
* and filters stopwords with {@link StopFilter}
*
* @since 3.1
*/
public final class CJKAnalyzer extends StopwordAnalyzerBase {

View File

@ -36,6 +36,8 @@ import org.apache.lucene.util.IOUtils;
/**
* {@link Analyzer} for Sorani Kurdish.
*
* @since 4.10.0
*/
public final class SoraniAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;

View File

@ -37,6 +37,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.CommonGramsFilterFactory" words="commongramsstopwords.txt" ignoreCase="false"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class CommonGramsFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
// TODO: shared base class for Stop/Keep/CommonGrams?

View File

@ -34,6 +34,8 @@ import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
* &lt;filter class="solr.CommonGramsQueryFilterFactory" words="commongramsquerystopwords.txt" ignoreCase="false"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class CommonGramsQueryFilterFactory extends CommonGramsFilterFactory {

View File

@ -36,6 +36,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="true"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class DictionaryCompoundWordTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
private CharArraySet dictionary;

View File

@ -22,6 +22,8 @@ import org.apache.lucene.analysis.Analyzer;
/**
* "Tokenizes" the entire stream as a single token. This is useful
* for data like zip codes, ids, and some product names.
*
* @since 3.1
*/
public final class KeywordAnalyzer extends Analyzer {
public KeywordAnalyzer() {

View File

@ -38,6 +38,8 @@ import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LE
* MAX_TOKEN_LENGTH_LIMIT (1024*1024). It is rare to need to change this
* else {@link KeywordTokenizer}::DEFAULT_BUFFER_SIZE</li>
* </ul>
*
* @since 3.1
*/
public class KeywordTokenizerFactory extends TokenizerFactory {
private final int maxTokenLen;

View File

@ -40,6 +40,8 @@ import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LE
* It is rare to need to change this
* else {@link CharTokenizer}::DEFAULT_MAX_TOKEN_LEN</li>
* </ul>
*
* @since 3.1
*/
public class LetterTokenizerFactory extends TokenizerFactory {
private final int maxTokenLen;

View File

@ -33,6 +33,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class LowerCaseFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {

View File

@ -24,6 +24,8 @@ import org.apache.lucene.analysis.Tokenizer;
/** An {@link Analyzer} that filters {@link LetterTokenizer}
* with {@link LowerCaseFilter}
*
* @since 3.1
**/
public final class SimpleAnalyzer extends Analyzer {

View File

@ -31,6 +31,8 @@ import org.apache.lucene.analysis.WordlistLoader;
/**
* Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
*
* @since 3.1
*/
public final class StopAnalyzer extends StopwordAnalyzerBase {

View File

@ -69,6 +69,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* for details.
* </li>
* </ul>
*
* @since 3.1
*/
public class StopFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
public static final String FORMAT_WORDSET = "wordset";

View File

@ -21,6 +21,8 @@ import org.apache.lucene.analysis.Analyzer;
/**
* An Analyzer that uses {@link UnicodeWhitespaceTokenizer}.
*
* @since 5.4.0
**/
public final class UnicodeWhitespaceAnalyzer extends Analyzer {

View File

@ -21,6 +21,8 @@ import org.apache.lucene.analysis.Analyzer;
/**
* An Analyzer that uses {@link WhitespaceTokenizer}.
*
* @since 3.1
**/
public final class WhitespaceAnalyzer extends Analyzer {

View File

@ -45,6 +45,8 @@ import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LE
* It is rare to need to change this
* else {@link CharTokenizer}::DEFAULT_MAX_TOKEN_LEN</li>
* </ul>
*
* @since 3.1
*/
public class WhitespaceTokenizerFactory extends TokenizerFactory {
public static final String RULE_JAVA = "java";

View File

@ -89,6 +89,8 @@ import static org.apache.lucene.analysis.util.AnalysisSPILoader.newFactoryClassI
* .endwhen()
* .build();
* </pre>
*
* @since 5.0.0
*/
public final class CustomAnalyzer extends Analyzer {

View File

@ -40,6 +40,8 @@ import org.apache.lucene.util.IOUtils;
* all). A default set of stopwords is used unless an alternative list is
* specified.
* </p>
*
* @since 3.1
*/
public final class CzechAnalyzer extends StopwordAnalyzerBase {
/** File containing default Czech stopwords. */

View File

@ -37,6 +37,8 @@ import org.tartarus.snowball.ext.DanishStemmer;
/**
* {@link Analyzer} for Danish.
*
* @since 3.1
*/
public final class DanishAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;

View File

@ -48,6 +48,8 @@ import org.apache.lucene.util.IOUtils;
*
* <p><b>NOTE</b>: This class uses the same {@link org.apache.lucene.util.Version}
* dependent settings as {@link StandardAnalyzer}.</p>
*
* @since 3.1
*/
public final class GermanAnalyzer extends StopwordAnalyzerBase {

View File

@ -33,6 +33,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.GermanStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class GermanStemFilterFactory extends TokenFilterFactory {

View File

@ -38,6 +38,8 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
*
* <p><b>NOTE</b>: This class uses the same {@link org.apache.lucene.util.Version}
* dependent settings as {@link StandardAnalyzer}.</p>
*
* @since 3.1
*/
public final class GreekAnalyzer extends StopwordAnalyzerBase {
/** File containing default Greek stopwords. */

View File

@ -34,6 +34,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.GreekLowerCaseFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class GreekLowerCaseFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {

View File

@ -33,6 +33,8 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
/**
* {@link Analyzer} for English.
*
* @since 3.1
*/
public final class EnglishAnalyzer extends StopwordAnalyzerBase {

View File

@ -33,6 +33,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.PorterStemFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class PorterStemFilterFactory extends TokenFilterFactory {

View File

@ -36,6 +36,8 @@ import org.apache.lucene.util.IOUtils;
/**
* {@link Analyzer} for Spanish.
*
* @since 3.1
*/
public final class SpanishAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;

View File

@ -34,6 +34,8 @@ import org.tartarus.snowball.ext.BasqueStemmer;
/**
* {@link Analyzer} for Basque.
*
* @since 3.1
*/
public final class BasqueAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;

View File

@ -38,6 +38,8 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
* zero-width non-joiner in addition to whitespace. Some persian-specific variant forms (such as farsi
* yeh and keheh) are standardized. "Stemming" is accomplished via stopwords.
* </p>
*
* @since 3.1
*/
public final class PersianAnalyzer extends StopwordAnalyzerBase {

View File

@ -35,6 +35,8 @@ import org.apache.lucene.analysis.util.MultiTermAwareComponent;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class PersianCharFilterFactory extends CharFilterFactory implements MultiTermAwareComponent {

View File

@ -35,6 +35,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.PersianNormalizationFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class PersianNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {

View File

@ -37,6 +37,8 @@ import org.tartarus.snowball.ext.FinnishStemmer;
/**
* {@link Analyzer} for Finnish.
*
* @since 3.1
*/
public final class FinnishAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;

View File

@ -49,6 +49,8 @@ import org.apache.lucene.util.IOUtils;
*
* <p><b>NOTE</b>: This class uses the same {@link org.apache.lucene.util.Version}
* dependent settings as {@link StandardAnalyzer}.</p>
*
* @since 3.1
*/
public final class FrenchAnalyzer extends StopwordAnalyzerBase {

View File

@ -35,6 +35,8 @@ import org.tartarus.snowball.ext.IrishStemmer;
/**
* {@link Analyzer} for Irish.
*
* @since 3.6.0
*/
public final class IrishAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;

View File

@ -35,6 +35,8 @@ import org.apache.lucene.util.IOUtils;
/**
* {@link Analyzer} for Galician.
*
* @since 3.1
*/
public final class GalicianAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;

View File

@ -33,6 +33,8 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
/**
* Analyzer for Hindi.
*
* @since 3.1
*/
public final class HindiAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;

View File

@ -37,6 +37,8 @@ import org.tartarus.snowball.ext.HungarianStemmer;
/**
* {@link Analyzer} for Hungarian.
*
* @since 3.1
*/
public final class HungarianAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;

View File

@ -34,6 +34,8 @@ import org.tartarus.snowball.ext.ArmenianStemmer;
/**
* {@link Analyzer} for Armenian.
*
* @since 3.1
*/
public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;

View File

@ -31,6 +31,8 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
/**
* Analyzer for Indonesian (Bahasa)
*
* @since 3.1
*/
public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
/** File containing default Indonesian stopwords. */

View File

@ -38,6 +38,8 @@ import org.apache.lucene.util.IOUtils;
/**
* {@link Analyzer} for Italian.
*
* @since 3.1
*/
public final class ItalianAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;

View File

@ -34,6 +34,8 @@ import org.tartarus.snowball.ext.LithuanianStemmer;
/**
* {@link Analyzer} for Lithuanian.
*
* @since 5.3.0
*/
public final class LithuanianAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;

View File

@ -35,6 +35,8 @@ import org.apache.lucene.util.IOUtils;
/**
* {@link Analyzer} for Latvian.
*
* @since 3.2
*/
public final class LatvianAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;

View File

@ -35,6 +35,8 @@ import org.apache.lucene.analysis.TokenStream;
* &lt;filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="false"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class ASCIIFoldingFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
private static final String PRESERVE_ORIGINAL = "preserveOriginal";

View File

@ -30,6 +30,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Abstract parent class for analysis factories that create {@link ConditionalTokenFilter} instances
*
* @since 7.4.0
*/
public abstract class ConditionalTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {

View File

@ -32,6 +32,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.HyphenatedWordsFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class HyphenatedWordsFilterFactory extends TokenFilterFactory {

View File

@ -35,6 +35,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.KeepWordFilterFactory" words="keepwords.txt" ignoreCase="false"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class KeepWordFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
private final boolean ignoreCase;

View File

@ -31,6 +31,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.LengthFilterFactory" min="0" max="1" /&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class LengthFilterFactory extends TokenFilterFactory {
final int min;

View File

@ -24,6 +24,8 @@ import org.apache.lucene.analysis.AnalyzerWrapper;
* This Analyzer limits the number of tokens while indexing. It is
* a replacement for the maximum field length setting inside {@link org.apache.lucene.index.IndexWriter}.
* @see LimitTokenCountFilter
*
* @since 3.1
*/
public final class LimitTokenCountAnalyzer extends AnalyzerWrapper {
private final Analyzer delegate;

View File

@ -47,6 +47,8 @@ import java.util.Map;
*
* <p>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
* and query parsing.
*
* @since 3.1
*/
public final class PerFieldAnalyzerWrapper extends DelegatingAnalyzerWrapper {
private final Analyzer defaultAnalyzer;

View File

@ -75,6 +75,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;/fieldType&gt;</pre>
*
* <p>See related {@link org.apache.lucene.analysis.custom.CustomAnalyzer.Builder#whenTerm(Predicate)}
*
* @since 7.4.0
*/
public class ProtectedTermFilterFactory extends ConditionalTokenFilterFactory implements ResourceLoaderAware {

View File

@ -32,6 +32,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.RemoveDuplicatesTokenFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class RemoveDuplicatesTokenFilterFactory extends TokenFilterFactory {

View File

@ -36,6 +36,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;/fieldType&gt;</pre>
*
* @see TrimFilter
*
* @since 3.1
*/
public class TrimFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {

View File

@ -36,6 +36,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* If the optional {@code prefix} parameter is used, the specified value will be prepended
* to the type, e.g. with prefix="_type_", for a token "example.com" with type "&lt;URL&gt;",
* the emitted synonym will have text "_type_&lt;URL&gt;".
*
* @since 7.3.0
*/
public class TypeAsSynonymFilterFactory extends TokenFilterFactory {
private final String prefix;

View File

@ -52,6 +52,8 @@ import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*;
* @deprecated Use {@link WordDelimiterGraphFilterFactory} instead: it produces a correct
* token graph so that e.g. {@link PhraseQuery} works correctly when it's used in
* the search time analyzer.
*
* @since 3.1
*/
@Deprecated
public class WordDelimiterFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {

View File

@ -32,6 +32,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="2" preserveOriginal="true"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class EdgeNGramFilterFactory extends TokenFilterFactory {
private final int maxGramSize;

View File

@ -31,6 +31,8 @@ import java.util.Map;
* &lt;tokenizer class="solr.EdgeNGramTokenizerFactory" minGramSize="1" maxGramSize="1"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class EdgeNGramTokenizerFactory extends TokenizerFactory {
private final int maxGramSize;

View File

@ -32,6 +32,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.NGramFilterFactory" minGramSize="1" maxGramSize="2" preserveOriginal="true"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class NGramFilterFactory extends TokenFilterFactory {
private final int maxGramSize;

View File

@ -33,6 +33,8 @@ import java.util.Map;
* &lt;tokenizer class="solr.NGramTokenizerFactory" minGramSize="1" maxGramSize="2"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class NGramTokenizerFactory extends TokenizerFactory {
private final int maxGramSize;

View File

@ -47,6 +47,8 @@ import org.apache.lucene.util.IOUtils;
* A default set of stopwords is used unless an alternative list is specified, but the
* exclusion list is empty by default.
* </p>
*
* @since 3.1
*/
// TODO: extend StopwordAnalyzerBase
public final class DutchAnalyzer extends Analyzer {

View File

@ -37,6 +37,8 @@ import org.tartarus.snowball.ext.NorwegianStemmer;
/**
* {@link Analyzer} for Norwegian.
*
* @since 3.1
*/
public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;

View File

@ -66,6 +66,8 @@ import org.apache.lucene.util.AttributeFactory;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;
* </pre>
*
* @since 3.1
*/
public class PathHierarchyTokenizerFactory extends TokenizerFactory {
private final char delimiter;

View File

@ -36,6 +36,8 @@ import java.util.regex.Pattern;
* &lt;/fieldType&gt;</pre>
*
* @see PatternReplaceFilter
*
* @since 3.1
*/
public class PatternReplaceFilterFactory extends TokenFilterFactory {
final Pattern pattern;

View File

@ -53,6 +53,8 @@ import org.apache.lucene.util.automaton.RegExp;
* @lucene.experimental
*
* @see SimplePatternSplitTokenizer
*
* @since 6.5.0
*/
public class SimplePatternSplitTokenizerFactory extends TokenizerFactory {
public static final String PATTERN = "pattern";

View File

@ -53,6 +53,8 @@ import org.apache.lucene.util.automaton.RegExp;
* @lucene.experimental
*
* @see SimplePatternTokenizer
*
* @since 6.5.0
*/
public class SimplePatternTokenizerFactory extends TokenizerFactory {
public static final String PATTERN = "pattern";

View File

@ -33,6 +33,8 @@ import java.util.Map;
* &lt;filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float" delimiter="|"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class DelimitedPayloadTokenFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
public static final String ENCODER_ATTR = "encoder";

View File

@ -30,6 +30,8 @@ import java.util.Map;
* &lt;filter class="solr.NumericPayloadTokenFilterFactory" payload="24" typeMatch="word"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class NumericPayloadTokenFilterFactory extends TokenFilterFactory {
private final float payload;

View File

@ -32,6 +32,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.TokenOffsetPayloadTokenFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class TokenOffsetPayloadTokenFilterFactory extends TokenFilterFactory {

View File

@ -32,6 +32,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* &lt;filter class="solr.TypeAsPayloadTokenFilterFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class TypeAsPayloadTokenFilterFactory extends TokenFilterFactory {

View File

@ -36,6 +36,8 @@ import org.apache.lucene.util.IOUtils;
/**
* {@link Analyzer} for Portuguese.
*
* @since 3.1
*/
public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;

View File

@ -47,6 +47,8 @@ import org.apache.lucene.util.CharsRefBuilder;
* a 38 million doc index which had a term in around 50% of docs and was causing TermQueries for
* this term to take 2 seconds.
* </p>
*
* @since 3.1
*/
public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {

View File

@ -34,6 +34,8 @@ import org.tartarus.snowball.ext.RomanianStemmer;
/**
* {@link Analyzer} for Romanian.
*
* @since 3.1
*/
public final class RomanianAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;

View File

@ -40,6 +40,8 @@ import org.apache.lucene.util.IOUtils;
* Supports an external list of stopwords (words that
* will not be indexed at all).
* A default set of stopwords is used unless an alternative list is specified.
*
* @since 3.1
*/
public final class RussianAnalyzer extends StopwordAnalyzerBase {

View File

@ -31,6 +31,8 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* <li>tokenSeparator - how tokens should be joined together in the shingle (default: space)
* <li>fillerToken - what should be added in place of stop words (default: _ )
* </ul>
*
* @since 7.4.0
*/
public class FixedShingleFilterFactory extends TokenFilterFactory {

View File

@ -26,6 +26,8 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
* <p>
* A shingle is another name for a token based n-gram.
* </p>
*
* @since 3.1
*/
public final class ShingleAnalyzerWrapper extends AnalyzerWrapper {

View File

@ -32,6 +32,8 @@ import java.util.Map;
* outputUnigrams="true" outputUnigramsIfNoShingles="false" tokenSeparator=" " fillerToken="_"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class ShingleFilterFactory extends TokenFilterFactory {
private final int minShingleSize;

View File

@ -41,6 +41,8 @@ import org.tartarus.snowball.SnowballProgram;
* &lt;filter class="solr.SnowballPorterFilterFactory" protected="protectedkeyword.txt" language="English"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class SnowballPorterFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
public static final String PROTECTED_TOKENS = "protected";

View File

@ -36,6 +36,8 @@ import org.apache.lucene.analysis.en.EnglishAnalyzer;
* ClassicAnalyzer was named StandardAnalyzer in Lucene versions prior to 3.1.
* As of 3.1, {@link StandardAnalyzer} implements Unicode text segmentation,
* as specified by UAX#29.
*
* @since 3.1
*/
public final class ClassicAnalyzer extends StopwordAnalyzerBase {

View File

@ -30,6 +30,8 @@ import java.util.Map;
* &lt;tokenizer class="solr.ClassicTokenizerFactory" maxTokenLength="120"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class ClassicTokenizerFactory extends TokenizerFactory {
private final int maxTokenLength;

View File

@ -30,6 +30,8 @@ import java.util.Map;
* &lt;tokenizer class="solr.StandardTokenizerFactory" maxTokenLength="255"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class StandardTokenizerFactory extends TokenizerFactory {
private final int maxTokenLength;

View File

@ -32,6 +32,8 @@ import org.apache.lucene.analysis.en.EnglishAnalyzer;
* with {@link org.apache.lucene.analysis.LowerCaseFilter} and
* {@link org.apache.lucene.analysis.StopFilter}, using a list of
* English stop words.
*
* @since 3.6.0
*/
public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase {

View File

@ -30,6 +30,8 @@ import java.util.Map;
* &lt;tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class UAX29URLEmailTokenizerFactory extends TokenizerFactory {
private final int maxTokenLength;

View File

@ -37,6 +37,8 @@ import org.tartarus.snowball.ext.SwedishStemmer;
/**
* {@link Analyzer} for Swedish.
*
* @since 3.1
*/
public final class SwedishAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;

View File

@ -76,6 +76,8 @@ import org.apache.lucene.analysis.util.TokenizerFactory;
*
* @deprecated Use {@link SynonymGraphFilterFactory} instead, but be sure to also
* use {@link FlattenGraphFilterFactory} at index time (not at search time) as well.
*
* @since 3.1
*/
@Deprecated
public class SynonymFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {

View File

@ -30,6 +30,8 @@ import org.apache.lucene.analysis.core.DecimalDigitFilter;
/**
* {@link Analyzer} for Thai language. It uses {@link java.text.BreakIterator} to break words.
*
* @since 3.1
*/
public final class ThaiAnalyzer extends StopwordAnalyzerBase {

View File

@ -31,6 +31,8 @@ import org.apache.lucene.util.AttributeFactory;
* &lt;tokenizer class="solr.ThaiTokenizerFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 4.10.0
*/
public class ThaiTokenizerFactory extends TokenizerFactory {

View File

@ -33,6 +33,8 @@ import org.tartarus.snowball.ext.TurkishStemmer;
/**
* {@link Analyzer} for Turkish.
*
* @since 3.1
*/
public final class TurkishAnalyzer extends StopwordAnalyzerBase {
private final CharArraySet stemExclusionSet;

View File

@ -26,6 +26,8 @@ import org.apache.lucene.analysis.CharFilter;
/**
* Abstract parent class for analysis factories that create {@link CharFilter}
* instances.
*
* @since 3.1
*/
public abstract class CharFilterFactory extends AbstractAnalysisFactory {

View File

@ -35,6 +35,8 @@ import org.apache.lucene.analysis.fr.FrenchAnalyzer;
* articles="stopwordarticles.txt" ignoreCase="true"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class ElisionFilterFactory extends TokenFilterFactory implements ResourceLoaderAware, MultiTermAwareComponent {
private final String articlesFile;

View File

@ -25,6 +25,8 @@ import org.apache.lucene.analysis.TokenStream;
/**
* Abstract parent class for analysis factories that create {@link org.apache.lucene.analysis.TokenFilter}
* instances.
*
* @since 3.1
*/
public abstract class TokenFilterFactory extends AbstractAnalysisFactory {

View File

@ -27,6 +27,8 @@ import java.util.Set;
/**
* Abstract parent class for analysis factories that create {@link Tokenizer}
* instances.
*
* @since 3.1
*/
public abstract class TokenizerFactory extends AbstractAnalysisFactory {

View File

@ -31,6 +31,8 @@ import org.apache.lucene.util.AttributeFactory;
* &lt;tokenizer class="solr.WikipediaTokenizerFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* @since 3.1
*/
public class WikipediaTokenizerFactory extends TokenizerFactory {
public static final String TOKEN_OUTPUT = "tokenOutput";

View File

@ -71,6 +71,8 @@ import java.text.Collator;
* CollationKeyAnalyzer to generate index terms, do not use
* ICUCollationKeyAnalyzer on the query side, or vice versa.
* </p>
*
* @since 3.1
*/
public final class CollationKeyAnalyzer extends Analyzer {
private final CollationAttributeFactory factory;

View File

@ -44,6 +44,8 @@ import com.ibm.icu.text.UnicodeSet;
* @see ICUNormalizer2CharFilter
* @see Normalizer2
* @see FilteredNormalizer2
*
* @since 4.10.0
*/
public class ICUNormalizer2CharFilterFactory extends CharFilterFactory implements MultiTermAwareComponent {
private final Normalizer2 normalizer;

Some files were not shown because too many files have changed in this diff Show More