mirror of https://github.com/apache/lucene.git
LUCENE-7444: StandardAnalyzer not longer uses english stopwords by default
This commit is contained in:
parent
7eb74ac50f
commit
5ae716c412
|
@ -73,6 +73,9 @@ Changes in Runtime Behavior
|
|||
* LUCENE-8031: Length normalization correctly reflects omission of term frequencies.
|
||||
(Robert Muir, Adrien Grand)
|
||||
|
||||
* LUCENE-7444: StandardAnalyzer no longer defaults to removing English stopwords
|
||||
(Alan Woodward)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-7997: Add BaseSimilarityTestCase to sanity check similarities.
|
||||
|
|
|
@ -52,3 +52,8 @@ Memory codecs have been removed from the codebase (MemoryPostings, MemoryDocValu
|
|||
|
||||
Caching everything is discouraged as it disables the ability to skip non-interesting documents.
|
||||
ALWAYS_CACHE can be replaced by a UsageTrackingQueryCachingPolicy with an appropriate config.
|
||||
|
||||
## English stopwords are no longer removed by default in StandardAnalyzer (LUCENE_7444) ##
|
||||
|
||||
To retain the old behaviour, pass StandardAnalyzer.ENGLISH_STOP_WORDS_SET as an argument
|
||||
to the constructor
|
|
@ -31,8 +31,8 @@ import org.apache.lucene.analysis.WordlistLoader;
|
|||
|
||||
/**
|
||||
* Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
|
||||
* LowerCaseFilter} and {@link StopFilter}, using a list of
|
||||
* English stop words.
|
||||
* LowerCaseFilter} and {@link StopFilter}, using a configurable list of
|
||||
* stop words.
|
||||
*/
|
||||
public final class StandardAnalyzer extends StopwordAnalyzerBase {
|
||||
|
||||
|
@ -67,10 +67,10 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
|
|||
super(stopWords);
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the default stop words ({@link #STOP_WORDS_SET}).
|
||||
/** Builds an analyzer with no stop words.
|
||||
*/
|
||||
public StandardAnalyzer() {
|
||||
this(STOP_WORDS_SET);
|
||||
this(CharArraySet.EMPTY_SET);
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the stop words from the given reader.
|
||||
|
|
Loading…
Reference in New Issue