mirror of https://github.com/apache/lucene.git
LUCENE-10413: Make default Ukrainian stopword set available (#665)
This commit adds a new getDefaultStopwords() static method to UkrainianMorfologikAnalyzer, which makes it possible to create an analyzer with the default stop word set but a custom stem exclusion set.
This commit is contained in:
parent
8178ffda00
commit
2183756f1c
|
@ -267,6 +267,8 @@ Other
|
||||||
and discover classes to check from module system. The test now checks all analyzer modules,
|
and discover classes to check from module system. The test now checks all analyzer modules,
|
||||||
so it may discover new bugs outside of analysis:common module. (Uwe Schindler, Robert Muir)
|
so it may discover new bugs outside of analysis:common module. (Uwe Schindler, Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-10413: Make Ukrainian default stop words list available as a public getter. (Alan Woodward)
|
||||||
|
|
||||||
======================= Lucene 9.0.0 =======================
|
======================= Lucene 9.0.0 =======================
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
|
|
|
@ -113,14 +113,11 @@ public final class UkrainianMorfologikAnalyzer extends StopwordAnalyzerBase {
|
||||||
return defaultResources;
|
return defaultResources;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class DefaultResources {
|
private record DefaultResources(CharArraySet stopSet, Dictionary dictionary) {}
|
||||||
final CharArraySet stopSet;
|
|
||||||
final Dictionary dictionary;
|
|
||||||
|
|
||||||
private DefaultResources(CharArraySet stopSet, Dictionary dictionary) {
|
/** Returns the default stopword set for this analyzer */
|
||||||
this.stopSet = stopSet;
|
public static CharArraySet getDefaultStopwords() {
|
||||||
this.dictionary = dictionary;
|
return CharArraySet.unmodifiableSet(getDefaultResources().stopSet);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Builds an analyzer with the default stop words. */
|
/** Builds an analyzer with the default stop words. */
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.uk;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
|
||||||
|
|
||||||
/** Test case for UkrainianAnalyzer. */
|
/** Test case for UkrainianAnalyzer. */
|
||||||
|
@ -99,4 +100,11 @@ public class TestUkrainianAnalyzer extends BaseTokenStreamTestCase {
|
||||||
checkRandomData(random(), analyzer, 200 * RANDOM_MULTIPLIER);
|
checkRandomData(random(), analyzer, 200 * RANDOM_MULTIPLIER);
|
||||||
analyzer.close();
|
analyzer.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testDefaultStopWords() {
|
||||||
|
CharArraySet stopwords = UkrainianMorfologikAnalyzer.getDefaultStopwords();
|
||||||
|
assertTrue(stopwords.contains("аби"));
|
||||||
|
stopwords.remove("аби");
|
||||||
|
assertTrue(UkrainianMorfologikAnalyzer.getDefaultStopwords().contains("аби"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue