LUCENE-8373: Removed StandardAnalyzer.ENGLISH_STOP_WORDS_SET

2018-06-29 16:02:48 +01:00 · 2018-06-29 16:02:48 +01:00 · ad727bac43
parent 6566b59f36
commit ad727bac43
4 changed files with 9 additions and 47 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -50,6 +50,9 @@ API Changes
 * LUCENE-8356: StandardFilter and StandardFilterFactory have been removed
  (Alan Woodward)

+* LUCENE-8373: StandardAnalyzer.ENGLISH_STOP_WORD_SET has been removed
+  (Alan Woodward)
+
 Changes in Runtime Behavior

 * LUCENE-8333: Switch MoreLikeThis.setMaxDocFreqPct to use maxDoc instead of
--- a/lucene/MIGRATE.txt
+++ b/lucene/MIGRATE.txt
@ -56,4 +56,9 @@ ALWAYS_CACHE can be replaced by a UsageTrackingQueryCachingPolicy with an approp
 ## English stopwords are no longer removed by default in StandardAnalyzer (LUCENE_7444) ##

 To retain the old behaviour, pass StandardAnalyzer.ENGLISH_STOP_WORDS_SET as an argument
-to the constructor
+to the constructor
+
+## StandardAnalyzer.ENGLISH_STOP_WORD_SET has been moved ##
+
+English stop words are now defined in EnglishAnalyzer#ENGLISH_STOP_WORD_SET in the
+analysis-common module
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
@ -28,27 +28,12 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
-import org.apache.lucene.analysis.en.EnglishAnalyzer;

 /** 
 * Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
 */
 public final class StopAnalyzer extends StopwordAnalyzerBase {

-  /** An unmodifiable set containing some common English words that are not usually useful
-    for searching.*/
-  @Deprecated
-  public static final CharArraySet ENGLISH_STOP_WORDS_SET = EnglishAnalyzer.ENGLISH_STOP_WORDS_SET;
-
-  /** Builds an analyzer which removes words in
-   *  {@link #ENGLISH_STOP_WORDS_SET}.
-   * @deprecated Use a constructor with a specific stop word set
-   */
-  @Deprecated
-  public StopAnalyzer() {
-    this(ENGLISH_STOP_WORDS_SET);
-  }
-
  /** Builds an analyzer with the stop words from the given set.
   * @param stopWords Set of stop words */
  public StopAnalyzer(CharArraySet stopWords) {
--- a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
@ -19,8 +19,6 @@ package org.apache.lucene.analysis.standard;

 import java.io.IOException;
 import java.io.Reader;
-import java.util.Arrays;
-import java.util.List;

 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
@ -34,41 +32,12 @@ import org.apache.lucene.analysis.WordlistLoader;
 * {@link StopFilter}, using a configurable list of stop words.
 */
 public final class StandardAnalyzer extends StopwordAnalyzerBase {
-
-  /**
-   * An unmodifiable set containing some common English words that are not
-   * usually useful for searching.
-   * @deprecated Use the stop words on EnglishAnalyzer in the analysis-common module
-   */
-  @Deprecated
-  public static final CharArraySet ENGLISH_STOP_WORDS_SET;
-
-  static {
-    final List<String> stopWords = Arrays.asList(
-      "a", "an", "and", "are", "as", "at", "be", "but", "by",
-      "for", "if", "in", "into", "is", "it",
-      "no", "not", "of", "on", "or", "such",
-      "that", "the", "their", "then", "there", "these",
-      "they", "this", "to", "was", "will", "with"
-    );
-    final CharArraySet stopSet = new CharArraySet(stopWords, false);
-    ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);
-  }
-
  
  /** Default maximum allowed token length */
  public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;

  private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;

-  /**
-   * An unmodifiable set containing some common English words that are usually not
-   * useful for searching.
-   * @deprecated Use the stop words on EnglishAnalyzer in the analysis-common module
-   */
-  @Deprecated
-  public static final CharArraySet STOP_WORDS_SET = ENGLISH_STOP_WORDS_SET;
-
  /** Builds an analyzer with the given stop words.
   * @param stopWords stop words */
  public StandardAnalyzer(CharArraySet stopWords) {