LUCENE-9765: Hunspell: rename SpellChecker to Hunspell, fix test name, update javadoc and CHANGES.txt (#2354)

This commit is contained in:
Peter Gromov 2021-02-12 15:44:36 +01:00 committed by GitHub
parent 9905c0cc2d
commit 02ea7a1139
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 31 additions and 23 deletions

View File

@ -89,8 +89,8 @@ API Changes
Improvements Improvements
* LUCENE-9687: Hunspell support improvements: add SpellChecker API, support default encoding and * LUCENE-9687: Hunspell support improvements: add API for spell-checking and suggestions, support compound words,
BREAK/FORBIDDENWORD/COMPOUNDRULE affix rules, improve stemming of all-caps words (Peter Gromov) fix various behavior differences between Java and C++ implementations, improve performance (Peter Gromov, Dawid Weiss)
* LUCENE-9633: Improve match highlighter behavior for degenerate intervals (on non-existing positions). * LUCENE-9633: Improve match highlighter behavior for degenerate intervals (on non-existing positions).
(Dawid Weiss) (Dawid Weiss)

View File

@ -43,9 +43,9 @@ class GeneratingSuggester {
private static final int MAX_WORDS = 100; private static final int MAX_WORDS = 100;
private static final int MAX_GUESSES = 200; private static final int MAX_GUESSES = 200;
private final Dictionary dictionary; private final Dictionary dictionary;
private final SpellChecker speller; private final Hunspell speller;
GeneratingSuggester(SpellChecker speller) { GeneratingSuggester(Hunspell speller) {
this.dictionary = speller.dictionary; this.dictionary = speller.dictionary;
this.speller = speller; this.speller = speller;
} }

View File

@ -34,15 +34,25 @@ import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
/** /**
* A spell checker based on Hunspell dictionaries. The objects of this class are not thread-safe * A spell checker based on Hunspell dictionaries. This class can be used in place of native
* (but a single underlying Dictionary can be shared by multiple spell-checkers in different * Hunspell for many languages for spell-checking and suggesting purposes. Note that not all
* threads). Not all Hunspell features are supported yet. * languages are supported yet. For example:
*
* <ul>
* <li>Hungarian (as it doesn't only rely on dictionaries, but has some logic directly in the
* source code
* <li>Languages with Unicode characters outside of the Basic Multilingual Plane
* <li>PHONE affix file option for suggestions
* </ul>
*
* <p>The objects of this class are not thread-safe (but a single underlying Dictionary can be
* shared by multiple spell-checkers in different threads).
*/ */
public class SpellChecker { public class Hunspell {
final Dictionary dictionary; final Dictionary dictionary;
final Stemmer stemmer; final Stemmer stemmer;
public SpellChecker(Dictionary dictionary) { public Hunspell(Dictionary dictionary) {
this.dictionary = dictionary; this.dictionary = dictionary;
stemmer = new Stemmer(dictionary); stemmer = new Stemmer(dictionary);
} }
@ -448,8 +458,8 @@ public class SpellChecker {
} }
} }
SpellChecker suggestionSpeller = Hunspell suggestionSpeller =
new SpellChecker(dictionary) { new Hunspell(dictionary) {
@Override @Override
boolean acceptsStem(int formID) { boolean acceptsStem(int formID) {
return !dictionary.hasFlag(formID, dictionary.noSuggest) return !dictionary.hasFlag(formID, dictionary.noSuggest)

View File

@ -28,10 +28,10 @@ class ModifyingSuggester {
private static final int MAX_CHAR_DISTANCE = 4; private static final int MAX_CHAR_DISTANCE = 4;
private final LinkedHashSet<String> result = new LinkedHashSet<>(); private final LinkedHashSet<String> result = new LinkedHashSet<>();
private final char[] tryChars; private final char[] tryChars;
private final SpellChecker speller; private final Hunspell speller;
boolean hasGoodSuggestions; boolean hasGoodSuggestions;
ModifyingSuggester(SpellChecker speller) { ModifyingSuggester(Hunspell speller) {
this.speller = speller; this.speller = speller;
tryChars = speller.dictionary.tryChars.toCharArray(); tryChars = speller.dictionary.tryChars.toCharArray();
} }

View File

@ -17,13 +17,11 @@
/** /**
* A Java implementation of <a href="http://hunspell.github.io/">Hunspell</a> stemming and * A Java implementation of <a href="http://hunspell.github.io/">Hunspell</a> stemming and
* spell-checking algorithms, and a stemming TokenFilter based on it. * spell-checking algorithms ({@link org.apache.lucene.analysis.hunspell.Hunspell}), and a stemming
* TokenFilter ({@link org.apache.lucene.analysis.hunspell.HunspellStemFilter}) based on it.
* *
* <p>For dictionaries, see e.g. <a href="https://github.com/LibreOffice/dictionaries">LibreOffice * <p>For dictionaries, see e.g. <a href="https://github.com/LibreOffice/dictionaries">LibreOffice
* repository</a> or <a href="https://github.com/wooorm/dictionaries">Titus Wormer's collection * repository</a> or <a href="https://github.com/wooorm/dictionaries">Titus Wormer's collection
* (UTF)</a> * (UTF)</a>
*
* @see org.apache.lucene.analysis.hunspell.HunspellStemFilter
* @see org.apache.lucene.analysis.hunspell.SpellChecker
*/ */
package org.apache.lucene.analysis.hunspell; package org.apache.lucene.analysis.hunspell;

View File

@ -32,7 +32,7 @@ import org.junit.runner.RunWith;
import org.junit.runners.Parameterized; import org.junit.runners.Parameterized;
/** /**
* Same as {@link SpellCheckerTest}, but checks all Hunspell's test data. The path to the checked * Same as {@link TestSpellChecking}, but checks all Hunspell's test data. The path to the checked
* out Hunspell repository should be in {@code hunspell.repo.path} system property. * out Hunspell repository should be in {@code hunspell.repo.path} system property.
*/ */
@RunWith(Parameterized.class) @RunWith(Parameterized.class)
@ -78,7 +78,7 @@ public class TestHunspellRepositoryTestCases {
@Test @Test
public void test() throws Throwable { public void test() throws Throwable {
ThrowingRunnable test = () -> SpellCheckerTest.checkSpellCheckerExpectations(pathPrefix); ThrowingRunnable test = () -> TestSpellChecking.checkSpellCheckerExpectations(pathPrefix);
if (EXPECTED_FAILURES.contains(testName)) { if (EXPECTED_FAILURES.contains(testName)) {
Assert.assertThrows(Throwable.class, test); Assert.assertThrows(Throwable.class, test);
} else { } else {

View File

@ -76,7 +76,7 @@ public class TestPerformance extends LuceneTestCase {
List<String> words = loadWords(code, wordCount, dictionary); List<String> words = loadWords(code, wordCount, dictionary);
Stemmer stemmer = new Stemmer(dictionary); Stemmer stemmer = new Stemmer(dictionary);
SpellChecker speller = new SpellChecker(dictionary); Hunspell speller = new Hunspell(dictionary);
measure( measure(
"Stemming " + code, "Stemming " + code,
blackHole -> { blackHole -> {

View File

@ -26,7 +26,7 @@ import java.util.stream.Collectors;
import org.apache.lucene.store.ByteBuffersDirectory; import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
public class SpellCheckerTest extends StemmerTestBase { public class TestSpellChecking extends StemmerTestBase {
public void testBase() throws Exception { public void testBase() throws Exception {
doTest("base"); doTest("base");
@ -221,11 +221,11 @@ public class SpellCheckerTest extends StemmerTestBase {
InputStream affixStream = Files.newInputStream(Path.of(basePath.toString() + ".aff")); InputStream affixStream = Files.newInputStream(Path.of(basePath.toString() + ".aff"));
InputStream dictStream = Files.newInputStream(Path.of(basePath.toString() + ".dic")); InputStream dictStream = Files.newInputStream(Path.of(basePath.toString() + ".dic"));
SpellChecker speller; Hunspell speller;
try { try {
Dictionary dictionary = Dictionary dictionary =
new Dictionary(new ByteBuffersDirectory(), "dictionary", affixStream, dictStream); new Dictionary(new ByteBuffersDirectory(), "dictionary", affixStream, dictStream);
speller = new SpellChecker(dictionary); speller = new Hunspell(dictionary);
} finally { } finally {
IOUtils.closeWhileHandlingException(affixStream); IOUtils.closeWhileHandlingException(affixStream);
IOUtils.closeWhileHandlingException(dictStream); IOUtils.closeWhileHandlingException(dictStream);