mirror of https://github.com/apache/lucene.git
LUCENE-8356: Deprecate StandardFilter
This commit is contained in:
parent
7bb9990654
commit
fce841534a
|
@ -112,6 +112,10 @@ Optimizations
|
|||
|
||||
======================= Lucene 7.5.0 =======================
|
||||
|
||||
API Changes:
|
||||
|
||||
* LUCENE-8356: StandardFilter is deprecated (Alan Woodward)
|
||||
|
||||
Bug Fixes:
|
||||
|
||||
* LUCENE-8164: IndexWriter silently accepts broken payload. This has been fixed
|
||||
|
|
|
@ -28,7 +28,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
||||
/**
|
||||
|
@ -111,15 +110,14 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link BulgarianStemFilter}.
|
||||
*/
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
|
||||
|
@ -129,8 +127,6 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,16 +17,20 @@
|
|||
package org.apache.lucene.analysis.bn;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.DecimalDigitFilter;
|
||||
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* Analyzer for Bengali.
|
||||
*/
|
||||
|
@ -122,8 +126,7 @@ public final class BengaliAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(in);
|
||||
result = new DecimalDigitFilter(result);
|
||||
result = new IndicNormalizationFilter(result);
|
||||
result = new BengaliNormalizationFilter(result);
|
||||
|
|
|
@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
|
@ -114,14 +113,13 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
|
|||
*
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , and {@link BrazilianStemFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StandardFilter(result);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if(excltable != null && !excltable.isEmpty())
|
||||
result = new SetKeywordMarkerFilter(result, excltable);
|
||||
|
@ -130,9 +128,7 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -25,14 +25,13 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.tartarus.snowball.ext.CatalanStemmer;
|
||||
|
||||
/**
|
||||
|
@ -114,15 +113,14 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link ElisionFilter}, {@link LowerCaseFilter},
|
||||
* {@link ElisionFilter}, {@link LowerCaseFilter},
|
||||
* {@link StopFilter}, {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new ElisionFilter(result, DEFAULT_ARTICLES);
|
||||
TokenStream result = new ElisionFilter(source, DEFAULT_ARTICLES);
|
||||
result = new LowerCaseFilter(result);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
|
@ -133,8 +131,7 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new ElisionFilter(result, DEFAULT_ARTICLES);
|
||||
TokenStream result = new ElisionFilter(in, DEFAULT_ARTICLES);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.core.DecimalDigitFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
|
@ -108,7 +107,7 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link SoraniNormalizationFilter},
|
||||
* {@link SoraniNormalizationFilter},
|
||||
* {@link LowerCaseFilter}, {@link DecimalDigitFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SoraniStemFilter}.
|
||||
|
@ -116,8 +115,7 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase {
|
|||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new SoraniNormalizationFilter(result);
|
||||
TokenStream result = new SoraniNormalizationFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
result = new DecimalDigitFilter(result);
|
||||
result = new StopFilter(result, stopwords);
|
||||
|
@ -129,8 +127,7 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new SoraniNormalizationFilter(result);
|
||||
TokenStream result = new SoraniNormalizationFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
result = new DecimalDigitFilter(result);
|
||||
return result;
|
||||
|
|
|
@ -17,7 +17,8 @@
|
|||
package org.apache.lucene.analysis.cz;
|
||||
|
||||
|
||||
import java.io.*;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
@ -29,7 +30,6 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
|
@ -107,7 +107,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
|
|||
*
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If
|
||||
* a stem exclusion set is provided via
|
||||
* {@link #CzechAnalyzer(CharArraySet, CharArraySet)} a
|
||||
|
@ -117,8 +117,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
|
|||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if(!this.stemExclusionTable.isEmpty())
|
||||
result = new SetKeywordMarkerFilter(result, stemExclusionTable);
|
||||
|
@ -128,9 +127,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.tartarus.snowball.ext.DanishStemmer;
|
||||
|
@ -109,15 +108,14 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
|
||||
|
@ -127,8 +125,6 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,7 +33,6 @@ import org.apache.lucene.analysis.WordlistLoader;
|
|||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
|
@ -124,15 +123,14 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
|
|||
*
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided, {@link GermanNormalizationFilter} and {@link GermanLightStemFilter}
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StopFilter(result, stopwords);
|
||||
result = new SetKeywordMarkerFilter(result, exclusionSet);
|
||||
result = new GermanNormalizationFilter(result);
|
||||
|
@ -142,8 +140,7 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(in);
|
||||
result = new GermanNormalizationFilter(result);
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -26,7 +26,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
||||
/**
|
||||
|
@ -92,14 +91,13 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
|
|||
*
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link GreekLowerCaseFilter}, {@link StandardFilter},
|
||||
* {@link GreekLowerCaseFilter},
|
||||
* {@link StopFilter}, and {@link GreekStemFilter}
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new GreekLowerCaseFilter(source);
|
||||
result = new StandardFilter(result);
|
||||
result = new StopFilter(result, stopwords);
|
||||
result = new GreekStemFilter(result);
|
||||
return new TokenStreamComponents(source, result);
|
||||
|
@ -107,8 +105,6 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new GreekLowerCaseFilter(result);
|
||||
return result;
|
||||
return new GreekLowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,7 +28,6 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
||||
/**
|
||||
|
@ -90,7 +89,7 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link EnglishPossessiveFilter},
|
||||
* {@link EnglishPossessiveFilter},
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link PorterStemFilter}.
|
||||
|
@ -98,8 +97,7 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
|
|||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new EnglishPossessiveFilter(result);
|
||||
TokenStream result = new EnglishPossessiveFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
|
@ -110,8 +108,6 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
|
@ -108,15 +107,14 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SpanishLightStemFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
|
||||
|
@ -126,8 +124,6 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.tartarus.snowball.ext.BasqueStemmer;
|
||||
|
||||
|
@ -106,15 +105,14 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
|
||||
|
@ -124,8 +122,6 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
|
||||
import org.apache.lucene.analysis.core.DecimalDigitFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
||||
/**
|
||||
|
@ -129,8 +128,7 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(in);
|
||||
result = new DecimalDigitFilter(result);
|
||||
result = new ArabicNormalizationFilter(result);
|
||||
/* additional persian-specific normalization */
|
||||
|
|
|
@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.tartarus.snowball.ext.FinnishStemmer;
|
||||
|
@ -109,15 +108,14 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
|
||||
|
@ -127,8 +125,6 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,8 +32,7 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer; // for javadoc
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
@ -127,7 +126,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
|
|||
*
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link ElisionFilter},
|
||||
* {@link ElisionFilter},
|
||||
* {@link LowerCaseFilter}, {@link StopFilter},
|
||||
* {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided, and {@link FrenchLightStemFilter}
|
||||
|
@ -135,8 +134,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
|
|||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new ElisionFilter(result, DEFAULT_ARTICLES);
|
||||
TokenStream result = new ElisionFilter(source, DEFAULT_ARTICLES);
|
||||
result = new LowerCaseFilter(result);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if(!excltable.isEmpty())
|
||||
|
@ -147,8 +145,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new ElisionFilter(result, DEFAULT_ARTICLES);
|
||||
TokenStream result = new ElisionFilter(in, DEFAULT_ARTICLES);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||
import org.tartarus.snowball.ext.IrishStemmer;
|
||||
|
@ -124,15 +123,14 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link IrishLowerCaseFilter}, {@link StopFilter}
|
||||
* {@link IrishLowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new StopFilter(result, HYPHENATIONS);
|
||||
TokenStream result = new StopFilter(source, HYPHENATIONS);
|
||||
result = new ElisionFilter(result, DEFAULT_ARTICLES);
|
||||
result = new IrishLowerCaseFilter(result);
|
||||
result = new StopFilter(result, stopwords);
|
||||
|
@ -144,8 +142,7 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new ElisionFilter(result, DEFAULT_ARTICLES);
|
||||
TokenStream result = new ElisionFilter(in, DEFAULT_ARTICLES);
|
||||
result = new IrishLowerCaseFilter(result);
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
|
@ -107,15 +106,14 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link GalicianStemFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
|
||||
|
@ -125,8 +123,6 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.core.DecimalDigitFilter;
|
||||
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
||||
/**
|
||||
|
@ -129,8 +128,7 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(in);
|
||||
result = new DecimalDigitFilter(result);
|
||||
result = new IndicNormalizationFilter(result);
|
||||
result = new HindiNormalizationFilter(result);
|
||||
|
|
|
@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.tartarus.snowball.ext.HungarianStemmer;
|
||||
|
@ -109,15 +108,14 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
|
||||
|
@ -127,8 +125,6 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.tartarus.snowball.ext.ArmenianStemmer;
|
||||
|
||||
|
@ -106,15 +105,14 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
|
||||
|
@ -124,8 +122,6 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,6 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
||||
/**
|
||||
|
@ -104,15 +103,14 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
|
|||
*
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter},
|
||||
* {@link LowerCaseFilter},
|
||||
* {@link StopFilter}, {@link SetKeywordMarkerFilter}
|
||||
* if a stem exclusion set is provided and {@link IndonesianStemFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if (!stemExclusionSet.isEmpty()) {
|
||||
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
|
||||
|
@ -122,8 +120,6 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,7 +32,6 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
@ -117,15 +116,14 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link ElisionFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* {@link ElisionFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link ItalianLightStemFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new ElisionFilter(result, DEFAULT_ARTICLES);
|
||||
TokenStream result = new ElisionFilter(source, DEFAULT_ARTICLES);
|
||||
result = new LowerCaseFilter(result);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
|
@ -136,8 +134,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new ElisionFilter(result, DEFAULT_ARTICLES);
|
||||
TokenStream result = new ElisionFilter(in, DEFAULT_ARTICLES);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.tartarus.snowball.ext.LithuanianStemmer;
|
||||
|
||||
|
@ -106,15 +105,14 @@ public final class LithuanianAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
|
||||
|
@ -124,8 +122,6 @@ public final class LithuanianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
|
@ -107,15 +106,14 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link LatvianStemFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
|
||||
|
@ -125,8 +123,6 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,10 +30,9 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
|
||||
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
@ -142,15 +141,14 @@ public final class DutchAnalyzer extends Analyzer {
|
|||
* text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A {@link TokenStream} built from a {@link StandardTokenizer}
|
||||
* filtered with {@link StandardFilter}, {@link LowerCaseFilter},
|
||||
* filtered with {@link LowerCaseFilter},
|
||||
* {@link StopFilter}, {@link SetKeywordMarkerFilter} if a stem exclusion set is provided,
|
||||
* {@link StemmerOverrideFilter}, and {@link SnowballFilter}
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StopFilter(result, stoptable);
|
||||
if (!excltable.isEmpty())
|
||||
result = new SetKeywordMarkerFilter(result, excltable);
|
||||
|
@ -162,8 +160,6 @@ public final class DutchAnalyzer extends Analyzer {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.tartarus.snowball.ext.NorwegianStemmer;
|
||||
|
@ -109,15 +108,14 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
|
||||
|
@ -127,9 +125,7 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
|
@ -108,15 +107,14 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link PortugueseLightStemFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
|
||||
|
@ -126,8 +124,6 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.tartarus.snowball.ext.RomanianStemmer;
|
||||
|
||||
|
@ -111,15 +110,14 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
|
||||
|
@ -129,8 +127,6 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
|
@ -106,15 +105,14 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase {
|
|||
*
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided, and {@link SnowballFilter}
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if (!stemExclusionSet.isEmpty())
|
||||
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
|
||||
|
@ -124,8 +122,6 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.analysis.standard;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
|
@ -32,7 +31,10 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <filter class="solr.StandardFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @deprecated StandardFilter is a no-op and can be removed from filter chains
|
||||
*/
|
||||
@Deprecated
|
||||
public class StandardFilterFactory extends TokenFilterFactory {
|
||||
|
||||
/** Creates a new StandardFilterFactory */
|
||||
|
|
|
@ -29,8 +29,7 @@ import org.apache.lucene.analysis.core.StopAnalyzer;
|
|||
|
||||
/**
|
||||
* Filters {@link org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer}
|
||||
* with {@link org.apache.lucene.analysis.standard.StandardFilter},
|
||||
* {@link org.apache.lucene.analysis.LowerCaseFilter} and
|
||||
* with {@link org.apache.lucene.analysis.LowerCaseFilter} and
|
||||
* {@link org.apache.lucene.analysis.StopFilter}, using a list of
|
||||
* English stop words.
|
||||
*/
|
||||
|
@ -87,8 +86,7 @@ public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase {
|
|||
protected TokenStreamComponents createComponents(final String fieldName) {
|
||||
final UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer();
|
||||
src.setMaxTokenLength(maxTokenLength);
|
||||
TokenStream tok = new StandardFilter(src);
|
||||
tok = new LowerCaseFilter(tok);
|
||||
TokenStream tok = new LowerCaseFilter(src);
|
||||
tok = new StopFilter(tok, stopwords);
|
||||
return new TokenStreamComponents(src, tok) {
|
||||
@Override
|
||||
|
|
|
@ -29,7 +29,6 @@
|
|||
based on the Unicode Text Segmentation algorithm.)
|
||||
{@link org.apache.lucene.analysis.standard.ClassicAnalyzer ClassicAnalyzer} includes
|
||||
{@link org.apache.lucene.analysis.standard.ClassicTokenizer ClassicTokenizer},
|
||||
{@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter},
|
||||
{@link org.apache.lucene.analysis.LowerCaseFilter LowerCaseFilter}
|
||||
and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
|
||||
</li>
|
||||
|
@ -41,14 +40,13 @@
|
|||
<br>
|
||||
{@link org.apache.lucene.analysis.standard.UAX29URLEmailAnalyzer UAX29URLEmailAnalyzer} includes
|
||||
{@link org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer UAX29URLEmailTokenizer},
|
||||
{@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter},
|
||||
{@link org.apache.lucene.analysis.LowerCaseFilter LowerCaseFilter}
|
||||
and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
|
||||
</li>
|
||||
</ul>
|
||||
<p>
|
||||
This Java package additionally contains {@code StandardAnalyzer}, {@code StandardTokenizer},
|
||||
and {@code StandardFilter}, which are not visible here, because they moved to Lucene Core.
|
||||
This Java package additionally contains {@code StandardAnalyzer} and {@code StandardTokenizer},
|
||||
which are not visible here, because they moved to Lucene Core.
|
||||
The factories for those components (e.g., used in Solr) are still part of this module.
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
@ -31,7 +31,6 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.tartarus.snowball.ext.SwedishStemmer;
|
||||
|
@ -109,15 +108,14 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
|
||||
|
@ -127,8 +125,6 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,7 +28,6 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.tartarus.snowball.ext.TurkishStemmer;
|
||||
|
||||
|
@ -110,15 +109,14 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link TurkishLowerCaseFilter},
|
||||
* {@link TurkishLowerCaseFilter},
|
||||
* {@link StopFilter}, {@link SetKeywordMarkerFilter} if a stem
|
||||
* exclusion set is provided and {@link SnowballFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new ApostropheFilter(result);
|
||||
TokenStream result = new ApostropheFilter(source);
|
||||
result = new TurkishLowerCaseFilter(result);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if (!stemExclusionSet.isEmpty()) {
|
||||
|
@ -130,8 +128,6 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new TurkishLowerCaseFilter(result);
|
||||
return result;
|
||||
return new TurkishLowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
@ -167,10 +166,10 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
|
|||
buffer.append(English.intToEnglish(i).toUpperCase(Locale.ROOT)).append(' ');
|
||||
}
|
||||
//make sure we produce the same tokens
|
||||
TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(standardTokenizer(buffer)));
|
||||
TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(standardTokenizer(buffer));
|
||||
TokenStream sink = new ModuloTokenFilter(teeStream.newSinkTokenStream(), 100);
|
||||
teeStream.consumeAllTokens();
|
||||
TokenStream stream = new ModuloTokenFilter(new StandardFilter(standardTokenizer(buffer)), 100);
|
||||
TokenStream stream = new ModuloTokenFilter(standardTokenizer(buffer), 100);
|
||||
CharTermAttribute tfTok = stream.addAttribute(CharTermAttribute.class);
|
||||
CharTermAttribute sinkTok = sink.addAttribute(CharTermAttribute.class);
|
||||
for (int i=0; stream.incrementToken(); i++) {
|
||||
|
@ -183,12 +182,12 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
|
|||
int tfPos = 0;
|
||||
long start = System.currentTimeMillis();
|
||||
for (int i = 0; i < 20; i++) {
|
||||
stream = new StandardFilter(standardTokenizer(buffer));
|
||||
stream = standardTokenizer(buffer);
|
||||
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
|
||||
while (stream.incrementToken()) {
|
||||
tfPos += posIncrAtt.getPositionIncrement();
|
||||
}
|
||||
stream = new ModuloTokenFilter(new StandardFilter(standardTokenizer(buffer)), modCounts[j]);
|
||||
stream = new ModuloTokenFilter(standardTokenizer(buffer), modCounts[j]);
|
||||
posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
|
||||
while (stream.incrementToken()) {
|
||||
tfPos += posIncrAtt.getPositionIncrement();
|
||||
|
@ -200,7 +199,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
|
|||
//simulate one field with one sink
|
||||
start = System.currentTimeMillis();
|
||||
for (int i = 0; i < 20; i++) {
|
||||
teeStream = new TeeSinkTokenFilter(new StandardFilter( standardTokenizer(buffer)));
|
||||
teeStream = new TeeSinkTokenFilter(standardTokenizer(buffer));
|
||||
sink = new ModuloTokenFilter(teeStream.newSinkTokenStream(), modCounts[j]);
|
||||
PositionIncrementAttribute posIncrAtt = teeStream.getAttribute(PositionIncrementAttribute.class);
|
||||
while (teeStream.incrementToken()) {
|
||||
|
|
|
@ -21,11 +21,8 @@ import java.io.Reader;
|
|||
|
||||
import morfologik.stemming.Dictionary;
|
||||
import morfologik.stemming.polish.PolishStemmer;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
||||
/**
|
||||
|
@ -60,7 +57,7 @@ public class MorfologikAnalyzer extends Analyzer {
|
|||
* @param field ignored field name
|
||||
* @return A {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter} and {@link MorfologikFilter}.
|
||||
* {@link MorfologikFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(final String field) {
|
||||
|
@ -68,11 +65,7 @@ public class MorfologikAnalyzer extends Analyzer {
|
|||
|
||||
return new TokenStreamComponents(
|
||||
src,
|
||||
new MorfologikFilter(new StandardFilter(src), dictionary));
|
||||
new MorfologikFilter(src, dictionary));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
return new StandardFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import morfologik.stemming.Dictionary;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
|
@ -33,12 +34,9 @@ import org.apache.lucene.analysis.charfilter.MappingCharFilter;
|
|||
import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.morfologik.MorfologikFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
import morfologik.stemming.Dictionary;
|
||||
|
||||
/**
|
||||
* A dictionary-based {@link Analyzer} for Ukrainian.
|
||||
*/
|
||||
|
@ -132,15 +130,14 @@ public final class UkrainianMorfologikAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link MorfologikFilter} on the Ukrainian dictionary.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StopFilter(result, stopwords);
|
||||
|
||||
if (stemExclusionSet.isEmpty() == false) {
|
||||
|
|
|
@ -26,7 +26,6 @@ import org.apache.lucene.analysis.CharArraySet;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
|
@ -178,8 +177,7 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
|
|||
keywords.add("liście");
|
||||
|
||||
final Tokenizer src = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(src);
|
||||
result = new SetKeywordMarkerFilter(result, keywords);
|
||||
TokenStream result = new SetKeywordMarkerFilter(src, keywords);
|
||||
result = new MorfologikFilter(result);
|
||||
|
||||
return new TokenStreamComponents(src, result);
|
||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.stempel.StempelFilter;
|
||||
import org.apache.lucene.analysis.stempel.StempelStemmer;
|
||||
|
@ -131,15 +130,14 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase {
|
|||
* @return A
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
|
||||
* provided and {@link StempelFilter}.
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new StandardFilter(source);
|
||||
result = new LowerCaseFilter(result);
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new StopFilter(result, stopwords);
|
||||
if(!stemExclusionSet.isEmpty())
|
||||
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
|
||||
|
@ -149,8 +147,6 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,9 +30,8 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
|
||||
/**
|
||||
* Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
|
||||
* LowerCaseFilter} and {@link StopFilter}, using a configurable list of
|
||||
* stop words.
|
||||
* Filters {@link StandardTokenizer} with {@link LowerCaseFilter} and
|
||||
* {@link StopFilter}, using a configurable list of stop words.
|
||||
*/
|
||||
public final class StandardAnalyzer extends StopwordAnalyzerBase {
|
||||
|
||||
|
@ -102,8 +101,7 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
|
|||
protected TokenStreamComponents createComponents(final String fieldName) {
|
||||
final StandardTokenizer src = new StandardTokenizer();
|
||||
src.setMaxTokenLength(maxTokenLength);
|
||||
TokenStream tok = new StandardFilter(src);
|
||||
tok = new LowerCaseFilter(tok);
|
||||
TokenStream tok = new LowerCaseFilter(src);
|
||||
tok = new StopFilter(tok, stopwords);
|
||||
return new TokenStreamComponents(src, tok) {
|
||||
@Override
|
||||
|
@ -118,8 +116,6 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
return new LowerCaseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,7 +24,10 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
|
||||
/**
|
||||
* Normalizes tokens extracted with {@link StandardTokenizer}.
|
||||
*
|
||||
* @deprecated StandardFilter is a no-op and can be removed from code
|
||||
*/
|
||||
@Deprecated
|
||||
public class StandardFilter extends TokenFilter {
|
||||
|
||||
/** Sole constructor */
|
||||
|
|
|
@ -25,7 +25,6 @@
|
|||
* <br>
|
||||
* {@link org.apache.lucene.analysis.standard.StandardAnalyzer StandardAnalyzer} includes
|
||||
* {@link org.apache.lucene.analysis.standard.StandardTokenizer StandardTokenizer},
|
||||
* {@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter},
|
||||
* {@link org.apache.lucene.analysis.LowerCaseFilter LowerCaseFilter}
|
||||
* and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue