mirror of https://github.com/apache/lucene.git
LUCENE-8373: Removed StandardAnalyzer.ENGLISH_STOP_WORDS_SET
This commit is contained in:
parent
6566b59f36
commit
ad727bac43
|
@ -50,6 +50,9 @@ API Changes
|
|||
* LUCENE-8356: StandardFilter and StandardFilterFactory have been removed
|
||||
(Alan Woodward)
|
||||
|
||||
* LUCENE-8373: StandardAnalyzer.ENGLISH_STOP_WORD_SET has been removed
|
||||
(Alan Woodward)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-8333: Switch MoreLikeThis.setMaxDocFreqPct to use maxDoc instead of
|
||||
|
|
|
@ -56,4 +56,9 @@ ALWAYS_CACHE can be replaced by a UsageTrackingQueryCachingPolicy with an approp
|
|||
## English stopwords are no longer removed by default in StandardAnalyzer (LUCENE_7444) ##
|
||||
|
||||
To retain the old behaviour, pass StandardAnalyzer.ENGLISH_STOP_WORDS_SET as an argument
|
||||
to the constructor
|
||||
to the constructor
|
||||
|
||||
## StandardAnalyzer.ENGLISH_STOP_WORD_SET has been moved ##
|
||||
|
||||
English stop words are now defined in EnglishAnalyzer#ENGLISH_STOP_WORD_SET in the
|
||||
analysis-common module
|
||||
|
|
|
@ -28,27 +28,12 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
||||
|
||||
/**
|
||||
* Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
|
||||
*/
|
||||
public final class StopAnalyzer extends StopwordAnalyzerBase {
|
||||
|
||||
/** An unmodifiable set containing some common English words that are not usually useful
|
||||
for searching.*/
|
||||
@Deprecated
|
||||
public static final CharArraySet ENGLISH_STOP_WORDS_SET = EnglishAnalyzer.ENGLISH_STOP_WORDS_SET;
|
||||
|
||||
/** Builds an analyzer which removes words in
|
||||
* {@link #ENGLISH_STOP_WORDS_SET}.
|
||||
* @deprecated Use a constructor with a specific stop word set
|
||||
*/
|
||||
@Deprecated
|
||||
public StopAnalyzer() {
|
||||
this(ENGLISH_STOP_WORDS_SET);
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the stop words from the given set.
|
||||
* @param stopWords Set of stop words */
|
||||
public StopAnalyzer(CharArraySet stopWords) {
|
||||
|
|
|
@ -19,8 +19,6 @@ package org.apache.lucene.analysis.standard;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
|
@ -34,41 +32,12 @@ import org.apache.lucene.analysis.WordlistLoader;
|
|||
* {@link StopFilter}, using a configurable list of stop words.
|
||||
*/
|
||||
public final class StandardAnalyzer extends StopwordAnalyzerBase {
|
||||
|
||||
/**
|
||||
* An unmodifiable set containing some common English words that are not
|
||||
* usually useful for searching.
|
||||
* @deprecated Use the stop words on EnglishAnalyzer in the analysis-common module
|
||||
*/
|
||||
@Deprecated
|
||||
public static final CharArraySet ENGLISH_STOP_WORDS_SET;
|
||||
|
||||
static {
|
||||
final List<String> stopWords = Arrays.asList(
|
||||
"a", "an", "and", "are", "as", "at", "be", "but", "by",
|
||||
"for", "if", "in", "into", "is", "it",
|
||||
"no", "not", "of", "on", "or", "such",
|
||||
"that", "the", "their", "then", "there", "these",
|
||||
"they", "this", "to", "was", "will", "with"
|
||||
);
|
||||
final CharArraySet stopSet = new CharArraySet(stopWords, false);
|
||||
ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);
|
||||
}
|
||||
|
||||
|
||||
/** Default maximum allowed token length */
|
||||
public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
|
||||
|
||||
private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
|
||||
|
||||
/**
|
||||
* An unmodifiable set containing some common English words that are usually not
|
||||
* useful for searching.
|
||||
* @deprecated Use the stop words on EnglishAnalyzer in the analysis-common module
|
||||
*/
|
||||
@Deprecated
|
||||
public static final CharArraySet STOP_WORDS_SET = ENGLISH_STOP_WORDS_SET;
|
||||
|
||||
/** Builds an analyzer with the given stop words.
|
||||
* @param stopWords stop words */
|
||||
public StandardAnalyzer(CharArraySet stopWords) {
|
||||
|
|
Loading…
Reference in New Issue