LUCENE-8356: Deprecate StandardFilter

This commit is contained in:
Alan Woodward 2018-06-13 13:42:17 +01:00
parent 7bb9990654
commit fce841534a
43 changed files with 131 additions and 254 deletions

View File

@ -112,6 +112,10 @@ Optimizations
======================= Lucene 7.5.0 =======================
API Changes:
* LUCENE-8356: StandardFilter is deprecated (Alan Woodward)
Bug Fixes:
* LUCENE-8164: IndexWriter silently accepts broken payload. This has been fixed

View File

@ -28,7 +28,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
/**
@ -111,15 +110,14 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link BulgarianStemFilter}.
*/
@Override
public TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(source);
result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
@ -129,8 +127,6 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -17,16 +17,20 @@
package org.apache.lucene.analysis.bn;
import org.apache.lucene.analysis.*;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.DecimalDigitFilter;
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import java.io.IOException;
import java.io.Reader;
/**
* Analyzer for Bengali.
*/
@ -122,8 +126,7 @@ public final class BengaliAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(in);
result = new DecimalDigitFilter(result);
result = new IndicNormalizationFilter(result);
result = new BengaliNormalizationFilter(result);

View File

@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.IOUtils;
@ -114,14 +113,13 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
*
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}
* {@link LowerCaseFilter}, {@link StopFilter}
* , and {@link BrazilianStemFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new StandardTokenizer();
TokenStream result = new LowerCaseFilter(source);
result = new StandardFilter(result);
result = new StopFilter(result, stopwords);
if(excltable != null && !excltable.isEmpty())
result = new SetKeywordMarkerFilter(result, excltable);
@ -130,9 +128,7 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -25,14 +25,13 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.tartarus.snowball.ext.CatalanStemmer;
/**
@ -114,15 +113,14 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link ElisionFilter}, {@link LowerCaseFilter},
* {@link ElisionFilter}, {@link LowerCaseFilter},
* {@link StopFilter}, {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
TokenStream result = new ElisionFilter(source, DEFAULT_ARTICLES);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
@ -133,8 +131,7 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
TokenStream result = new ElisionFilter(in, DEFAULT_ARTICLES);
result = new LowerCaseFilter(result);
return result;
}

View File

@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.core.DecimalDigitFilter;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.IOUtils;
@ -108,7 +107,7 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link SoraniNormalizationFilter},
* {@link SoraniNormalizationFilter},
* {@link LowerCaseFilter}, {@link DecimalDigitFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SoraniStemFilter}.
@ -116,8 +115,7 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new SoraniNormalizationFilter(result);
TokenStream result = new SoraniNormalizationFilter(source);
result = new LowerCaseFilter(result);
result = new DecimalDigitFilter(result);
result = new StopFilter(result, stopwords);
@ -129,8 +127,7 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new SoraniNormalizationFilter(result);
TokenStream result = new SoraniNormalizationFilter(in);
result = new LowerCaseFilter(result);
result = new DecimalDigitFilter(result);
return result;

View File

@ -17,7 +17,8 @@
package org.apache.lucene.analysis.cz;
import java.io.*;
import java.io.IOException;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
@ -29,7 +30,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.IOUtils;
@ -107,7 +107,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
*
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link LowerCaseFilter}, {@link StopFilter}
* , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If
* a stem exclusion set is provided via
* {@link #CzechAnalyzer(CharArraySet, CharArraySet)} a
@ -117,8 +117,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(source);
result = new StopFilter(result, stopwords);
if(!this.stemExclusionTable.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionTable);
@ -128,9 +127,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.IOUtils;
import org.tartarus.snowball.ext.DanishStemmer;
@ -109,15 +108,14 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(source);
result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
@ -127,8 +125,6 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -33,7 +33,6 @@ import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.IOUtils;
@ -124,15 +123,14 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
*
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided, {@link GermanNormalizationFilter} and {@link GermanLightStemFilter}
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(source);
result = new StopFilter(result, stopwords);
result = new SetKeywordMarkerFilter(result, exclusionSet);
result = new GermanNormalizationFilter(result);
@ -142,8 +140,7 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(in);
result = new GermanNormalizationFilter(result);
return result;
}

View File

@ -26,7 +26,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
/**
@ -92,14 +91,13 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
*
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link GreekLowerCaseFilter}, {@link StandardFilter},
* {@link GreekLowerCaseFilter},
* {@link StopFilter}, and {@link GreekStemFilter}
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new GreekLowerCaseFilter(source);
result = new StandardFilter(result);
result = new StopFilter(result, stopwords);
result = new GreekStemFilter(result);
return new TokenStreamComponents(source, result);
@ -107,8 +105,6 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new GreekLowerCaseFilter(result);
return result;
return new GreekLowerCaseFilter(in);
}
}

View File

@ -28,7 +28,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
/**
@ -90,7 +89,7 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link EnglishPossessiveFilter},
* {@link EnglishPossessiveFilter},
* {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link PorterStemFilter}.
@ -98,8 +97,7 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new EnglishPossessiveFilter(result);
TokenStream result = new EnglishPossessiveFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
@ -110,8 +108,6 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.IOUtils;
@ -108,15 +107,14 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SpanishLightStemFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(source);
result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
@ -126,8 +124,6 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -29,7 +29,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.tartarus.snowball.ext.BasqueStemmer;
@ -106,15 +105,14 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(source);
result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
@ -124,8 +122,6 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -29,7 +29,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
import org.apache.lucene.analysis.core.DecimalDigitFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
/**
@ -129,8 +128,7 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(in);
result = new DecimalDigitFilter(result);
result = new ArabicNormalizationFilter(result);
/* additional persian-specific normalization */

View File

@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.IOUtils;
import org.tartarus.snowball.ext.FinnishStemmer;
@ -109,15 +108,14 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(source);
result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
@ -127,8 +125,6 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -32,8 +32,7 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer; // for javadoc
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.util.IOUtils;
@ -127,7 +126,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
*
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link ElisionFilter},
* {@link ElisionFilter},
* {@link LowerCaseFilter}, {@link StopFilter},
* {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided, and {@link FrenchLightStemFilter}
@ -135,8 +134,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
TokenStream result = new ElisionFilter(source, DEFAULT_ARTICLES);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
if(!excltable.isEmpty())
@ -147,8 +145,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
TokenStream result = new ElisionFilter(in, DEFAULT_ARTICLES);
result = new LowerCaseFilter(result);
return result;
}

View File

@ -29,7 +29,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.tartarus.snowball.ext.IrishStemmer;
@ -124,15 +123,14 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link IrishLowerCaseFilter}, {@link StopFilter}
* {@link IrishLowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new StopFilter(result, HYPHENATIONS);
TokenStream result = new StopFilter(source, HYPHENATIONS);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
result = new IrishLowerCaseFilter(result);
result = new StopFilter(result, stopwords);
@ -144,8 +142,7 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
TokenStream result = new ElisionFilter(in, DEFAULT_ARTICLES);
result = new IrishLowerCaseFilter(result);
return result;
}

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.IOUtils;
@ -107,15 +106,14 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link GalicianStemFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(source);
result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
@ -125,8 +123,6 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -29,7 +29,6 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.DecimalDigitFilter;
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
/**
@ -129,8 +128,7 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(in);
result = new DecimalDigitFilter(result);
result = new IndicNormalizationFilter(result);
result = new HindiNormalizationFilter(result);

View File

@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.IOUtils;
import org.tartarus.snowball.ext.HungarianStemmer;
@ -109,15 +108,14 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(source);
result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
@ -127,8 +125,6 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -29,7 +29,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.tartarus.snowball.ext.ArmenianStemmer;
@ -106,15 +105,14 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(source);
result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
@ -124,8 +122,6 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -27,7 +27,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
/**
@ -104,15 +103,14 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
*
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter},
* {@link LowerCaseFilter},
* {@link StopFilter}, {@link SetKeywordMarkerFilter}
* if a stem exclusion set is provided and {@link IndonesianStemFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(source);
result = new StopFilter(result, stopwords);
if (!stemExclusionSet.isEmpty()) {
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
@ -122,8 +120,6 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -32,7 +32,6 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.apache.lucene.util.IOUtils;
@ -117,15 +116,14 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link ElisionFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link ElisionFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link ItalianLightStemFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
TokenStream result = new ElisionFilter(source, DEFAULT_ARTICLES);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
@ -136,8 +134,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
TokenStream result = new ElisionFilter(in, DEFAULT_ARTICLES);
result = new LowerCaseFilter(result);
return result;
}

View File

@ -29,7 +29,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.tartarus.snowball.ext.LithuanianStemmer;
@ -106,15 +105,14 @@ public final class LithuanianAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(source);
result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
@ -124,8 +122,6 @@ public final class LithuanianAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.IOUtils;
@ -107,15 +106,14 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link LatvianStemFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(source);
result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
@ -125,8 +123,6 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -30,10 +30,9 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.IOUtils;
@ -142,15 +141,14 @@ public final class DutchAnalyzer extends Analyzer {
* text in the provided {@link Reader}.
*
* @return A {@link TokenStream} built from a {@link StandardTokenizer}
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
* filtered with {@link LowerCaseFilter},
* {@link StopFilter}, {@link SetKeywordMarkerFilter} if a stem exclusion set is provided,
* {@link StemmerOverrideFilter}, and {@link SnowballFilter}
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(source);
result = new StopFilter(result, stoptable);
if (!excltable.isEmpty())
result = new SetKeywordMarkerFilter(result, excltable);
@ -162,8 +160,6 @@ public final class DutchAnalyzer extends Analyzer {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.IOUtils;
import org.tartarus.snowball.ext.NorwegianStemmer;
@ -109,15 +108,14 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(source);
result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
@ -127,9 +125,7 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.IOUtils;
@ -108,15 +107,14 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link PortugueseLightStemFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(source);
result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
@ -126,8 +124,6 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -29,7 +29,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.tartarus.snowball.ext.RomanianStemmer;
@ -111,15 +110,14 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(source);
result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
@ -129,8 +127,6 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.IOUtils;
@ -106,15 +105,14 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase {
*
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided, and {@link SnowballFilter}
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(source);
result = new StopFilter(result, stopwords);
if (!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
@ -124,8 +122,6 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.standard;
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
@ -32,7 +31,10 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* <filter class="solr.StandardFilterFactory"/>
* </analyzer>
* &lt;/fieldType&gt;</pre>
*
* @deprecated StandardFilter is a no-op and can be removed from filter chains
*/
@Deprecated
public class StandardFilterFactory extends TokenFilterFactory {
/** Creates a new StandardFilterFactory */

View File

@ -29,8 +29,7 @@ import org.apache.lucene.analysis.core.StopAnalyzer;
/**
* Filters {@link org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer}
* with {@link org.apache.lucene.analysis.standard.StandardFilter},
* {@link org.apache.lucene.analysis.LowerCaseFilter} and
* with {@link org.apache.lucene.analysis.LowerCaseFilter} and
* {@link org.apache.lucene.analysis.StopFilter}, using a list of
* English stop words.
*/
@ -87,8 +86,7 @@ public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase {
protected TokenStreamComponents createComponents(final String fieldName) {
final UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer();
src.setMaxTokenLength(maxTokenLength);
TokenStream tok = new StandardFilter(src);
tok = new LowerCaseFilter(tok);
TokenStream tok = new LowerCaseFilter(src);
tok = new StopFilter(tok, stopwords);
return new TokenStreamComponents(src, tok) {
@Override

View File

@ -29,7 +29,6 @@
based on the Unicode Text Segmentation algorithm.)
{@link org.apache.lucene.analysis.standard.ClassicAnalyzer ClassicAnalyzer} includes
{@link org.apache.lucene.analysis.standard.ClassicTokenizer ClassicTokenizer},
{@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter},
{@link org.apache.lucene.analysis.LowerCaseFilter LowerCaseFilter}
and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
</li>
@ -41,14 +40,13 @@
<br>
{@link org.apache.lucene.analysis.standard.UAX29URLEmailAnalyzer UAX29URLEmailAnalyzer} includes
{@link org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer UAX29URLEmailTokenizer},
{@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter},
{@link org.apache.lucene.analysis.LowerCaseFilter LowerCaseFilter}
and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
</li>
</ul>
<p>
This Java package additionally contains {@code StandardAnalyzer}, {@code StandardTokenizer},
and {@code StandardFilter}, which are not visible here, because they moved to Lucene Core.
This Java package additionally contains {@code StandardAnalyzer} and {@code StandardTokenizer},
which are not visible here, because they moved to Lucene Core.
The factories for those components (e.g., used in Solr) are still part of this module.
</body>
</html>

View File

@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.IOUtils;
import org.tartarus.snowball.ext.SwedishStemmer;
@ -109,15 +108,14 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(source);
result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
@ -127,8 +125,6 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -28,7 +28,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.tartarus.snowball.ext.TurkishStemmer;
@ -110,15 +109,14 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link TurkishLowerCaseFilter},
* {@link TurkishLowerCaseFilter},
* {@link StopFilter}, {@link SetKeywordMarkerFilter} if a stem
* exclusion set is provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new ApostropheFilter(result);
TokenStream result = new ApostropheFilter(source);
result = new TurkishLowerCaseFilter(result);
result = new StopFilter(result, stopwords);
if (!stemExclusionSet.isEmpty()) {
@ -130,8 +128,6 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new TurkishLowerCaseFilter(result);
return result;
return new TurkishLowerCaseFilter(in);
}
}

View File

@ -29,7 +29,6 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@ -167,10 +166,10 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
buffer.append(English.intToEnglish(i).toUpperCase(Locale.ROOT)).append(' ');
}
//make sure we produce the same tokens
TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(standardTokenizer(buffer)));
TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(standardTokenizer(buffer));
TokenStream sink = new ModuloTokenFilter(teeStream.newSinkTokenStream(), 100);
teeStream.consumeAllTokens();
TokenStream stream = new ModuloTokenFilter(new StandardFilter(standardTokenizer(buffer)), 100);
TokenStream stream = new ModuloTokenFilter(standardTokenizer(buffer), 100);
CharTermAttribute tfTok = stream.addAttribute(CharTermAttribute.class);
CharTermAttribute sinkTok = sink.addAttribute(CharTermAttribute.class);
for (int i=0; stream.incrementToken(); i++) {
@ -183,12 +182,12 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
int tfPos = 0;
long start = System.currentTimeMillis();
for (int i = 0; i < 20; i++) {
stream = new StandardFilter(standardTokenizer(buffer));
stream = standardTokenizer(buffer);
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
while (stream.incrementToken()) {
tfPos += posIncrAtt.getPositionIncrement();
}
stream = new ModuloTokenFilter(new StandardFilter(standardTokenizer(buffer)), modCounts[j]);
stream = new ModuloTokenFilter(standardTokenizer(buffer), modCounts[j]);
posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
while (stream.incrementToken()) {
tfPos += posIncrAtt.getPositionIncrement();
@ -200,7 +199,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
//simulate one field with one sink
start = System.currentTimeMillis();
for (int i = 0; i < 20; i++) {
teeStream = new TeeSinkTokenFilter(new StandardFilter( standardTokenizer(buffer)));
teeStream = new TeeSinkTokenFilter(standardTokenizer(buffer));
sink = new ModuloTokenFilter(teeStream.newSinkTokenStream(), modCounts[j]);
PositionIncrementAttribute posIncrAtt = teeStream.getAttribute(PositionIncrementAttribute.class);
while (teeStream.incrementToken()) {

View File

@ -21,11 +21,8 @@ import java.io.Reader;
import morfologik.stemming.Dictionary;
import morfologik.stemming.polish.PolishStemmer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
/**
@ -60,7 +57,7 @@ public class MorfologikAnalyzer extends Analyzer {
* @param field ignored field name
* @return A {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter} and {@link MorfologikFilter}.
* {@link MorfologikFilter}.
*/
@Override
protected TokenStreamComponents createComponents(final String field) {
@ -68,11 +65,7 @@ public class MorfologikAnalyzer extends Analyzer {
return new TokenStreamComponents(
src,
new MorfologikFilter(new StandardFilter(src), dictionary));
new MorfologikFilter(src, dictionary));
}
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
return new StandardFilter(in);
}
}

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import morfologik.stemming.Dictionary;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
@ -33,12 +34,9 @@ import org.apache.lucene.analysis.charfilter.MappingCharFilter;
import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.morfologik.MorfologikFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.IOUtils;
import morfologik.stemming.Dictionary;
/**
* A dictionary-based {@link Analyzer} for Ukrainian.
*/
@ -132,15 +130,14 @@ public final class UkrainianMorfologikAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link MorfologikFilter} on the Ukrainian dictionary.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(source);
result = new StopFilter(result, stopwords);
if (stemExclusionSet.isEmpty() == false) {

View File

@ -26,7 +26,6 @@ import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -178,8 +177,7 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
keywords.add("liście");
final Tokenizer src = new StandardTokenizer();
TokenStream result = new StandardFilter(src);
result = new SetKeywordMarkerFilter(result, keywords);
TokenStream result = new SetKeywordMarkerFilter(src, keywords);
result = new MorfologikFilter(result);
return new TokenStreamComponents(src, result);

View File

@ -30,7 +30,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.stempel.StempelFilter;
import org.apache.lucene.analysis.stempel.StempelStemmer;
@ -131,15 +130,14 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase {
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link StempelFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
TokenStream result = new LowerCaseFilter(source);
result = new StopFilter(result, stopwords);
if(!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
@ -149,8 +147,6 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -30,9 +30,8 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WordlistLoader;
/**
* Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
* LowerCaseFilter} and {@link StopFilter}, using a configurable list of
* stop words.
* Filters {@link StandardTokenizer} with {@link LowerCaseFilter} and
* {@link StopFilter}, using a configurable list of stop words.
*/
public final class StandardAnalyzer extends StopwordAnalyzerBase {
@ -102,8 +101,7 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
protected TokenStreamComponents createComponents(final String fieldName) {
final StandardTokenizer src = new StandardTokenizer();
src.setMaxTokenLength(maxTokenLength);
TokenStream tok = new StandardFilter(src);
tok = new LowerCaseFilter(tok);
TokenStream tok = new LowerCaseFilter(src);
tok = new StopFilter(tok, stopwords);
return new TokenStreamComponents(src, tok) {
@Override
@ -118,8 +116,6 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new StandardFilter(in);
result = new LowerCaseFilter(result);
return result;
return new LowerCaseFilter(in);
}
}

View File

@ -24,7 +24,10 @@ import org.apache.lucene.analysis.TokenStream;
/**
* Normalizes tokens extracted with {@link StandardTokenizer}.
*
* @deprecated StandardFilter is a no-op and can be removed from code
*/
@Deprecated
public class StandardFilter extends TokenFilter {
/** Sole constructor */

View File

@ -25,7 +25,6 @@
* <br>
* {@link org.apache.lucene.analysis.standard.StandardAnalyzer StandardAnalyzer} includes
* {@link org.apache.lucene.analysis.standard.StandardTokenizer StandardTokenizer},
* {@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter},
* {@link org.apache.lucene.analysis.LowerCaseFilter LowerCaseFilter}
* and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
*/