mirror of https://github.com/apache/lucene.git
LUCENE-9765: Hunspell: rename SpellChecker to Hunspell, fix test name, update javadoc and CHANGES.txt (#2354)
This commit is contained in:
parent
9905c0cc2d
commit
02ea7a1139
|
@ -89,8 +89,8 @@ API Changes
|
|||
|
||||
Improvements
|
||||
|
||||
* LUCENE-9687: Hunspell support improvements: add SpellChecker API, support default encoding and
|
||||
BREAK/FORBIDDENWORD/COMPOUNDRULE affix rules, improve stemming of all-caps words (Peter Gromov)
|
||||
* LUCENE-9687: Hunspell support improvements: add API for spell-checking and suggestions, support compound words,
|
||||
fix various behavior differences between Java and C++ implementations, improve performance (Peter Gromov, Dawid Weiss)
|
||||
|
||||
* LUCENE-9633: Improve match highlighter behavior for degenerate intervals (on non-existing positions).
|
||||
(Dawid Weiss)
|
||||
|
|
|
@ -43,9 +43,9 @@ class GeneratingSuggester {
|
|||
private static final int MAX_WORDS = 100;
|
||||
private static final int MAX_GUESSES = 200;
|
||||
private final Dictionary dictionary;
|
||||
private final SpellChecker speller;
|
||||
private final Hunspell speller;
|
||||
|
||||
GeneratingSuggester(SpellChecker speller) {
|
||||
GeneratingSuggester(Hunspell speller) {
|
||||
this.dictionary = speller.dictionary;
|
||||
this.speller = speller;
|
||||
}
|
||||
|
|
|
@ -34,15 +34,25 @@ import org.apache.lucene.util.CharsRef;
|
|||
import org.apache.lucene.util.IntsRef;
|
||||
|
||||
/**
|
||||
* A spell checker based on Hunspell dictionaries. The objects of this class are not thread-safe
|
||||
* (but a single underlying Dictionary can be shared by multiple spell-checkers in different
|
||||
* threads). Not all Hunspell features are supported yet.
|
||||
* A spell checker based on Hunspell dictionaries. This class can be used in place of native
|
||||
* Hunspell for many languages for spell-checking and suggesting purposes. Note that not all
|
||||
* languages are supported yet. For example:
|
||||
*
|
||||
* <ul>
|
||||
* <li>Hungarian (as it doesn't only rely on dictionaries, but has some logic directly in the
|
||||
* source code
|
||||
* <li>Languages with Unicode characters outside of the Basic Multilingual Plane
|
||||
* <li>PHONE affix file option for suggestions
|
||||
* </ul>
|
||||
*
|
||||
* <p>The objects of this class are not thread-safe (but a single underlying Dictionary can be
|
||||
* shared by multiple spell-checkers in different threads).
|
||||
*/
|
||||
public class SpellChecker {
|
||||
public class Hunspell {
|
||||
final Dictionary dictionary;
|
||||
final Stemmer stemmer;
|
||||
|
||||
public SpellChecker(Dictionary dictionary) {
|
||||
public Hunspell(Dictionary dictionary) {
|
||||
this.dictionary = dictionary;
|
||||
stemmer = new Stemmer(dictionary);
|
||||
}
|
||||
|
@ -448,8 +458,8 @@ public class SpellChecker {
|
|||
}
|
||||
}
|
||||
|
||||
SpellChecker suggestionSpeller =
|
||||
new SpellChecker(dictionary) {
|
||||
Hunspell suggestionSpeller =
|
||||
new Hunspell(dictionary) {
|
||||
@Override
|
||||
boolean acceptsStem(int formID) {
|
||||
return !dictionary.hasFlag(formID, dictionary.noSuggest)
|
|
@ -28,10 +28,10 @@ class ModifyingSuggester {
|
|||
private static final int MAX_CHAR_DISTANCE = 4;
|
||||
private final LinkedHashSet<String> result = new LinkedHashSet<>();
|
||||
private final char[] tryChars;
|
||||
private final SpellChecker speller;
|
||||
private final Hunspell speller;
|
||||
boolean hasGoodSuggestions;
|
||||
|
||||
ModifyingSuggester(SpellChecker speller) {
|
||||
ModifyingSuggester(Hunspell speller) {
|
||||
this.speller = speller;
|
||||
tryChars = speller.dictionary.tryChars.toCharArray();
|
||||
}
|
||||
|
|
|
@ -17,13 +17,11 @@
|
|||
|
||||
/**
|
||||
* A Java implementation of <a href="http://hunspell.github.io/">Hunspell</a> stemming and
|
||||
* spell-checking algorithms, and a stemming TokenFilter based on it.
|
||||
* spell-checking algorithms ({@link org.apache.lucene.analysis.hunspell.Hunspell}), and a stemming
|
||||
* TokenFilter ({@link org.apache.lucene.analysis.hunspell.HunspellStemFilter}) based on it.
|
||||
*
|
||||
* <p>For dictionaries, see e.g. <a href="https://github.com/LibreOffice/dictionaries">LibreOffice
|
||||
* repository</a> or <a href="https://github.com/wooorm/dictionaries">Titus Wormer's collection
|
||||
* (UTF)</a>
|
||||
*
|
||||
* @see org.apache.lucene.analysis.hunspell.HunspellStemFilter
|
||||
* @see org.apache.lucene.analysis.hunspell.SpellChecker
|
||||
*/
|
||||
package org.apache.lucene.analysis.hunspell;
|
||||
|
|
|
@ -32,7 +32,7 @@ import org.junit.runner.RunWith;
|
|||
import org.junit.runners.Parameterized;
|
||||
|
||||
/**
|
||||
* Same as {@link SpellCheckerTest}, but checks all Hunspell's test data. The path to the checked
|
||||
* Same as {@link TestSpellChecking}, but checks all Hunspell's test data. The path to the checked
|
||||
* out Hunspell repository should be in {@code hunspell.repo.path} system property.
|
||||
*/
|
||||
@RunWith(Parameterized.class)
|
||||
|
@ -78,7 +78,7 @@ public class TestHunspellRepositoryTestCases {
|
|||
|
||||
@Test
|
||||
public void test() throws Throwable {
|
||||
ThrowingRunnable test = () -> SpellCheckerTest.checkSpellCheckerExpectations(pathPrefix);
|
||||
ThrowingRunnable test = () -> TestSpellChecking.checkSpellCheckerExpectations(pathPrefix);
|
||||
if (EXPECTED_FAILURES.contains(testName)) {
|
||||
Assert.assertThrows(Throwable.class, test);
|
||||
} else {
|
||||
|
|
|
@ -76,7 +76,7 @@ public class TestPerformance extends LuceneTestCase {
|
|||
List<String> words = loadWords(code, wordCount, dictionary);
|
||||
|
||||
Stemmer stemmer = new Stemmer(dictionary);
|
||||
SpellChecker speller = new SpellChecker(dictionary);
|
||||
Hunspell speller = new Hunspell(dictionary);
|
||||
measure(
|
||||
"Stemming " + code,
|
||||
blackHole -> {
|
||||
|
|
|
@ -26,7 +26,7 @@ import java.util.stream.Collectors;
|
|||
import org.apache.lucene.store.ByteBuffersDirectory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
public class SpellCheckerTest extends StemmerTestBase {
|
||||
public class TestSpellChecking extends StemmerTestBase {
|
||||
|
||||
public void testBase() throws Exception {
|
||||
doTest("base");
|
||||
|
@ -221,11 +221,11 @@ public class SpellCheckerTest extends StemmerTestBase {
|
|||
InputStream affixStream = Files.newInputStream(Path.of(basePath.toString() + ".aff"));
|
||||
InputStream dictStream = Files.newInputStream(Path.of(basePath.toString() + ".dic"));
|
||||
|
||||
SpellChecker speller;
|
||||
Hunspell speller;
|
||||
try {
|
||||
Dictionary dictionary =
|
||||
new Dictionary(new ByteBuffersDirectory(), "dictionary", affixStream, dictStream);
|
||||
speller = new SpellChecker(dictionary);
|
||||
speller = new Hunspell(dictionary);
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(affixStream);
|
||||
IOUtils.closeWhileHandlingException(dictStream);
|
Loading…
Reference in New Issue