mirror of https://github.com/apache/lucene.git
LUCENE-8373: Removed StandardAnalyzer.ENGLISH_STOP_WORDS_SET
This commit is contained in:
parent
6566b59f36
commit
ad727bac43
|
@ -50,6 +50,9 @@ API Changes
|
||||||
* LUCENE-8356: StandardFilter and StandardFilterFactory have been removed
|
* LUCENE-8356: StandardFilter and StandardFilterFactory have been removed
|
||||||
(Alan Woodward)
|
(Alan Woodward)
|
||||||
|
|
||||||
|
* LUCENE-8373: StandardAnalyzer.ENGLISH_STOP_WORD_SET has been removed
|
||||||
|
(Alan Woodward)
|
||||||
|
|
||||||
Changes in Runtime Behavior
|
Changes in Runtime Behavior
|
||||||
|
|
||||||
* LUCENE-8333: Switch MoreLikeThis.setMaxDocFreqPct to use maxDoc instead of
|
* LUCENE-8333: Switch MoreLikeThis.setMaxDocFreqPct to use maxDoc instead of
|
||||||
|
|
|
@ -57,3 +57,8 @@ ALWAYS_CACHE can be replaced by a UsageTrackingQueryCachingPolicy with an approp
|
||||||
|
|
||||||
To retain the old behaviour, pass StandardAnalyzer.ENGLISH_STOP_WORDS_SET as an argument
|
To retain the old behaviour, pass StandardAnalyzer.ENGLISH_STOP_WORDS_SET as an argument
|
||||||
to the constructor
|
to the constructor
|
||||||
|
|
||||||
|
## StandardAnalyzer.ENGLISH_STOP_WORD_SET has been moved ##
|
||||||
|
|
||||||
|
English stop words are now defined in EnglishAnalyzer#ENGLISH_STOP_WORD_SET in the
|
||||||
|
analysis-common module
|
||||||
|
|
|
@ -28,27 +28,12 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.WordlistLoader;
|
import org.apache.lucene.analysis.WordlistLoader;
|
||||||
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
|
* Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
|
||||||
*/
|
*/
|
||||||
public final class StopAnalyzer extends StopwordAnalyzerBase {
|
public final class StopAnalyzer extends StopwordAnalyzerBase {
|
||||||
|
|
||||||
/** An unmodifiable set containing some common English words that are not usually useful
|
|
||||||
for searching.*/
|
|
||||||
@Deprecated
|
|
||||||
public static final CharArraySet ENGLISH_STOP_WORDS_SET = EnglishAnalyzer.ENGLISH_STOP_WORDS_SET;
|
|
||||||
|
|
||||||
/** Builds an analyzer which removes words in
|
|
||||||
* {@link #ENGLISH_STOP_WORDS_SET}.
|
|
||||||
* @deprecated Use a constructor with a specific stop word set
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public StopAnalyzer() {
|
|
||||||
this(ENGLISH_STOP_WORDS_SET);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Builds an analyzer with the stop words from the given set.
|
/** Builds an analyzer with the stop words from the given set.
|
||||||
* @param stopWords Set of stop words */
|
* @param stopWords Set of stop words */
|
||||||
public StopAnalyzer(CharArraySet stopWords) {
|
public StopAnalyzer(CharArraySet stopWords) {
|
||||||
|
|
|
@ -19,8 +19,6 @@ package org.apache.lucene.analysis.standard;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
|
@ -35,40 +33,11 @@ import org.apache.lucene.analysis.WordlistLoader;
|
||||||
*/
|
*/
|
||||||
public final class StandardAnalyzer extends StopwordAnalyzerBase {
|
public final class StandardAnalyzer extends StopwordAnalyzerBase {
|
||||||
|
|
||||||
/**
|
|
||||||
* An unmodifiable set containing some common English words that are not
|
|
||||||
* usually useful for searching.
|
|
||||||
* @deprecated Use the stop words on EnglishAnalyzer in the analysis-common module
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public static final CharArraySet ENGLISH_STOP_WORDS_SET;
|
|
||||||
|
|
||||||
static {
|
|
||||||
final List<String> stopWords = Arrays.asList(
|
|
||||||
"a", "an", "and", "are", "as", "at", "be", "but", "by",
|
|
||||||
"for", "if", "in", "into", "is", "it",
|
|
||||||
"no", "not", "of", "on", "or", "such",
|
|
||||||
"that", "the", "their", "then", "there", "these",
|
|
||||||
"they", "this", "to", "was", "will", "with"
|
|
||||||
);
|
|
||||||
final CharArraySet stopSet = new CharArraySet(stopWords, false);
|
|
||||||
ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/** Default maximum allowed token length */
|
/** Default maximum allowed token length */
|
||||||
public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
|
public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
|
||||||
|
|
||||||
private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
|
private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
|
||||||
|
|
||||||
/**
|
|
||||||
* An unmodifiable set containing some common English words that are usually not
|
|
||||||
* useful for searching.
|
|
||||||
* @deprecated Use the stop words on EnglishAnalyzer in the analysis-common module
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public static final CharArraySet STOP_WORDS_SET = ENGLISH_STOP_WORDS_SET;
|
|
||||||
|
|
||||||
/** Builds an analyzer with the given stop words.
|
/** Builds an analyzer with the given stop words.
|
||||||
* @param stopWords stop words */
|
* @param stopWords stop words */
|
||||||
public StandardAnalyzer(CharArraySet stopWords) {
|
public StandardAnalyzer(CharArraySet stopWords) {
|
||||||
|
|
Loading…
Reference in New Issue