LUCENE-8373: Removed StandardAnalyzer.ENGLISH_STOP_WORDS_SET

This commit is contained in:
Alan Woodward 2018-06-29 16:02:48 +01:00
parent 6566b59f36
commit ad727bac43
4 changed files with 9 additions and 47 deletions

View File

@ -50,6 +50,9 @@ API Changes
* LUCENE-8356: StandardFilter and StandardFilterFactory have been removed
(Alan Woodward)
* LUCENE-8373: StandardAnalyzer.ENGLISH_STOP_WORD_SET has been removed
(Alan Woodward)
Changes in Runtime Behavior
* LUCENE-8333: Switch MoreLikeThis.setMaxDocFreqPct to use maxDoc instead of

View File

@ -56,4 +56,9 @@ ALWAYS_CACHE can be replaced by a UsageTrackingQueryCachingPolicy with an approp
## English stopwords are no longer removed by default in StandardAnalyzer (LUCENE_7444) ##
To retain the old behaviour, pass StandardAnalyzer.ENGLISH_STOP_WORDS_SET as an argument
to the constructor
to the constructor
## StandardAnalyzer.ENGLISH_STOP_WORD_SET has been moved ##
English stop words are now defined in EnglishAnalyzer#ENGLISH_STOP_WORD_SET in the
analysis-common module

View File

@ -28,27 +28,12 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
/**
* Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
*/
public final class StopAnalyzer extends StopwordAnalyzerBase {
/** An unmodifiable set containing some common English words that are not usually useful
for searching.*/
@Deprecated
public static final CharArraySet ENGLISH_STOP_WORDS_SET = EnglishAnalyzer.ENGLISH_STOP_WORDS_SET;
/** Builds an analyzer which removes words in
* {@link #ENGLISH_STOP_WORDS_SET}.
* @deprecated Use a constructor with a specific stop word set
*/
@Deprecated
public StopAnalyzer() {
this(ENGLISH_STOP_WORDS_SET);
}
/** Builds an analyzer with the stop words from the given set.
* @param stopWords Set of stop words */
public StopAnalyzer(CharArraySet stopWords) {

View File

@ -19,8 +19,6 @@ package org.apache.lucene.analysis.standard;
import java.io.IOException;
import java.io.Reader;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
@ -34,41 +32,12 @@ import org.apache.lucene.analysis.WordlistLoader;
* {@link StopFilter}, using a configurable list of stop words.
*/
public final class StandardAnalyzer extends StopwordAnalyzerBase {
/**
* An unmodifiable set containing some common English words that are not
* usually useful for searching.
* @deprecated Use the stop words on EnglishAnalyzer in the analysis-common module
*/
@Deprecated
public static final CharArraySet ENGLISH_STOP_WORDS_SET;
static {
final List<String> stopWords = Arrays.asList(
"a", "an", "and", "are", "as", "at", "be", "but", "by",
"for", "if", "in", "into", "is", "it",
"no", "not", "of", "on", "or", "such",
"that", "the", "their", "then", "there", "these",
"they", "this", "to", "was", "will", "with"
);
final CharArraySet stopSet = new CharArraySet(stopWords, false);
ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);
}
/** Default maximum allowed token length */
public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
/**
* An unmodifiable set containing some common English words that are usually not
* useful for searching.
* @deprecated Use the stop words on EnglishAnalyzer in the analysis-common module
*/
@Deprecated
public static final CharArraySet STOP_WORDS_SET = ENGLISH_STOP_WORDS_SET;
/** Builds an analyzer with the given stop words.
* @param stopWords stop words */
public StandardAnalyzer(CharArraySet stopWords) {