LUCENE-8373: Removed StandardAnalyzer.ENGLISH_STOP_WORDS_SET

2018-06-29 16:02:48 +01:00 · 2018-06-29 16:02:48 +01:00 · ad727bac43
parent 6566b59f36
commit ad727bac43
4 changed files with 9 additions and 47 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -50,6 +50,9 @@ API Changes
 * LUCENE-8356: StandardFilter and StandardFilterFactory have been removed
  (Alan Woodward)
 * LUCENE-8373: StandardAnalyzer.ENGLISH_STOP_WORD_SET has been removed
  (Alan Woodward)
 Changes in Runtime Behavior
 * LUCENE-8333: Switch MoreLikeThis.setMaxDocFreqPct to use maxDoc instead of
--- a/lucene/MIGRATE.txt
+++ b/lucene/MIGRATE.txt
@ -57,3 +57,8 @@ ALWAYS_CACHE can be replaced by a UsageTrackingQueryCachingPolicy with an approp
 To retain the old behaviour, pass StandardAnalyzer.ENGLISH_STOP_WORDS_SET as an argument
 to the constructor
 ## StandardAnalyzer.ENGLISH_STOP_WORD_SET has been moved ##
 English stop words are now defined in EnglishAnalyzer#ENGLISH_STOP_WORD_SET in the
 analysis-common module
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
@ -28,27 +28,12 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.en.EnglishAnalyzer;
 /** 
 * Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
 */
 public final class StopAnalyzer extends StopwordAnalyzerBase {
  /** An unmodifiable set containing some common English words that are not usually useful
    for searching.*/
  @Deprecated
  public static final CharArraySet ENGLISH_STOP_WORDS_SET = EnglishAnalyzer.ENGLISH_STOP_WORDS_SET;
  /** Builds an analyzer which removes words in
   *  {@link #ENGLISH_STOP_WORDS_SET}.
   * @deprecated Use a constructor with a specific stop word set
   */
  @Deprecated
  public StopAnalyzer() {
    this(ENGLISH_STOP_WORDS_SET);
  }
  /** Builds an analyzer with the stop words from the given set.
   * @param stopWords Set of stop words */
  public StopAnalyzer(CharArraySet stopWords) {
--- a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
@ -19,8 +19,6 @@ package org.apache.lucene.analysis.standard;
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Arrays;
 import java.util.List;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
@ -35,40 +33,11 @@ import org.apache.lucene.analysis.WordlistLoader;
 */
 public final class StandardAnalyzer extends StopwordAnalyzerBase {
  /**
   * An unmodifiable set containing some common English words that are not
   * usually useful for searching.
   * @deprecated Use the stop words on EnglishAnalyzer in the analysis-common module
   */
  @Deprecated
  public static final CharArraySet ENGLISH_STOP_WORDS_SET;
  static {
    final List<String> stopWords = Arrays.asList(
      "a", "an", "and", "are", "as", "at", "be", "but", "by",
      "for", "if", "in", "into", "is", "it",
      "no", "not", "of", "on", "or", "such",
      "that", "the", "their", "then", "there", "these",
      "they", "this", "to", "was", "will", "with"
    );
    final CharArraySet stopSet = new CharArraySet(stopWords, false);
    ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);
  }
  /** Default maximum allowed token length */
  public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
  private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
  /**
   * An unmodifiable set containing some common English words that are usually not
   * useful for searching.
   * @deprecated Use the stop words on EnglishAnalyzer in the analysis-common module
   */
  @Deprecated
  public static final CharArraySet STOP_WORDS_SET = ENGLISH_STOP_WORDS_SET;
  /** Builds an analyzer with the given stop words.
   * @param stopWords stop words */
  public StandardAnalyzer(CharArraySet stopWords) {