mirror of https://github.com/apache/lucene.git
LUCENE-10413: Make default Ukrainian stopword set available (#665)
This commit adds a new getDefaultStopwords() static method to UkrainianMorfologikAnalyzer, which makes it possible to create an analyzer with the default stop word set but a custom stem exclusion set.
This commit is contained in:
parent
8178ffda00
commit
2183756f1c
|
@ -267,6 +267,8 @@ Other
|
|||
and discover classes to check from module system. The test now checks all analyzer modules,
|
||||
so it may discover new bugs outside of analysis:common module. (Uwe Schindler, Robert Muir)
|
||||
|
||||
* LUCENE-10413: Make Ukrainian default stop words list available as a public getter. (Alan Woodward)
|
||||
|
||||
======================= Lucene 9.0.0 =======================
|
||||
|
||||
New Features
|
||||
|
|
|
@ -113,14 +113,11 @@ public final class UkrainianMorfologikAnalyzer extends StopwordAnalyzerBase {
|
|||
return defaultResources;
|
||||
}
|
||||
|
||||
private static class DefaultResources {
|
||||
final CharArraySet stopSet;
|
||||
final Dictionary dictionary;
|
||||
private record DefaultResources(CharArraySet stopSet, Dictionary dictionary) {}
|
||||
|
||||
private DefaultResources(CharArraySet stopSet, Dictionary dictionary) {
|
||||
this.stopSet = stopSet;
|
||||
this.dictionary = dictionary;
|
||||
}
|
||||
/** Returns the default stopword set for this analyzer */
|
||||
public static CharArraySet getDefaultStopwords() {
|
||||
return CharArraySet.unmodifiableSet(getDefaultResources().stopSet);
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the default stop words. */
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.uk;
|
|||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
|
||||
|
||||
/** Test case for UkrainianAnalyzer. */
|
||||
|
@ -99,4 +100,11 @@ public class TestUkrainianAnalyzer extends BaseTokenStreamTestCase {
|
|||
checkRandomData(random(), analyzer, 200 * RANDOM_MULTIPLIER);
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
public void testDefaultStopWords() {
|
||||
CharArraySet stopwords = UkrainianMorfologikAnalyzer.getDefaultStopwords();
|
||||
assertTrue(stopwords.contains("аби"));
|
||||
stopwords.remove("аби");
|
||||
assertTrue(UkrainianMorfologikAnalyzer.getDefaultStopwords().contains("аби"));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue