mirror of https://github.com/apache/lucene.git
LUCENE-9765: Hunspell: rename SpellChecker to Hunspell, fix test name, update javadoc and CHANGES.txt (#2354)
This commit is contained in:
parent
9905c0cc2d
commit
02ea7a1139
|
@ -89,8 +89,8 @@ API Changes
|
||||||
|
|
||||||
Improvements
|
Improvements
|
||||||
|
|
||||||
* LUCENE-9687: Hunspell support improvements: add SpellChecker API, support default encoding and
|
* LUCENE-9687: Hunspell support improvements: add API for spell-checking and suggestions, support compound words,
|
||||||
BREAK/FORBIDDENWORD/COMPOUNDRULE affix rules, improve stemming of all-caps words (Peter Gromov)
|
fix various behavior differences between Java and C++ implementations, improve performance (Peter Gromov, Dawid Weiss)
|
||||||
|
|
||||||
* LUCENE-9633: Improve match highlighter behavior for degenerate intervals (on non-existing positions).
|
* LUCENE-9633: Improve match highlighter behavior for degenerate intervals (on non-existing positions).
|
||||||
(Dawid Weiss)
|
(Dawid Weiss)
|
||||||
|
|
|
@ -43,9 +43,9 @@ class GeneratingSuggester {
|
||||||
private static final int MAX_WORDS = 100;
|
private static final int MAX_WORDS = 100;
|
||||||
private static final int MAX_GUESSES = 200;
|
private static final int MAX_GUESSES = 200;
|
||||||
private final Dictionary dictionary;
|
private final Dictionary dictionary;
|
||||||
private final SpellChecker speller;
|
private final Hunspell speller;
|
||||||
|
|
||||||
GeneratingSuggester(SpellChecker speller) {
|
GeneratingSuggester(Hunspell speller) {
|
||||||
this.dictionary = speller.dictionary;
|
this.dictionary = speller.dictionary;
|
||||||
this.speller = speller;
|
this.speller = speller;
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,15 +34,25 @@ import org.apache.lucene.util.CharsRef;
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.IntsRef;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A spell checker based on Hunspell dictionaries. The objects of this class are not thread-safe
|
* A spell checker based on Hunspell dictionaries. This class can be used in place of native
|
||||||
* (but a single underlying Dictionary can be shared by multiple spell-checkers in different
|
* Hunspell for many languages for spell-checking and suggesting purposes. Note that not all
|
||||||
* threads). Not all Hunspell features are supported yet.
|
* languages are supported yet. For example:
|
||||||
|
*
|
||||||
|
* <ul>
|
||||||
|
* <li>Hungarian (as it doesn't only rely on dictionaries, but has some logic directly in the
|
||||||
|
* source code
|
||||||
|
* <li>Languages with Unicode characters outside of the Basic Multilingual Plane
|
||||||
|
* <li>PHONE affix file option for suggestions
|
||||||
|
* </ul>
|
||||||
|
*
|
||||||
|
* <p>The objects of this class are not thread-safe (but a single underlying Dictionary can be
|
||||||
|
* shared by multiple spell-checkers in different threads).
|
||||||
*/
|
*/
|
||||||
public class SpellChecker {
|
public class Hunspell {
|
||||||
final Dictionary dictionary;
|
final Dictionary dictionary;
|
||||||
final Stemmer stemmer;
|
final Stemmer stemmer;
|
||||||
|
|
||||||
public SpellChecker(Dictionary dictionary) {
|
public Hunspell(Dictionary dictionary) {
|
||||||
this.dictionary = dictionary;
|
this.dictionary = dictionary;
|
||||||
stemmer = new Stemmer(dictionary);
|
stemmer = new Stemmer(dictionary);
|
||||||
}
|
}
|
||||||
|
@ -448,8 +458,8 @@ public class SpellChecker {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SpellChecker suggestionSpeller =
|
Hunspell suggestionSpeller =
|
||||||
new SpellChecker(dictionary) {
|
new Hunspell(dictionary) {
|
||||||
@Override
|
@Override
|
||||||
boolean acceptsStem(int formID) {
|
boolean acceptsStem(int formID) {
|
||||||
return !dictionary.hasFlag(formID, dictionary.noSuggest)
|
return !dictionary.hasFlag(formID, dictionary.noSuggest)
|
|
@ -28,10 +28,10 @@ class ModifyingSuggester {
|
||||||
private static final int MAX_CHAR_DISTANCE = 4;
|
private static final int MAX_CHAR_DISTANCE = 4;
|
||||||
private final LinkedHashSet<String> result = new LinkedHashSet<>();
|
private final LinkedHashSet<String> result = new LinkedHashSet<>();
|
||||||
private final char[] tryChars;
|
private final char[] tryChars;
|
||||||
private final SpellChecker speller;
|
private final Hunspell speller;
|
||||||
boolean hasGoodSuggestions;
|
boolean hasGoodSuggestions;
|
||||||
|
|
||||||
ModifyingSuggester(SpellChecker speller) {
|
ModifyingSuggester(Hunspell speller) {
|
||||||
this.speller = speller;
|
this.speller = speller;
|
||||||
tryChars = speller.dictionary.tryChars.toCharArray();
|
tryChars = speller.dictionary.tryChars.toCharArray();
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,13 +17,11 @@
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A Java implementation of <a href="http://hunspell.github.io/">Hunspell</a> stemming and
|
* A Java implementation of <a href="http://hunspell.github.io/">Hunspell</a> stemming and
|
||||||
* spell-checking algorithms, and a stemming TokenFilter based on it.
|
* spell-checking algorithms ({@link org.apache.lucene.analysis.hunspell.Hunspell}), and a stemming
|
||||||
|
* TokenFilter ({@link org.apache.lucene.analysis.hunspell.HunspellStemFilter}) based on it.
|
||||||
*
|
*
|
||||||
* <p>For dictionaries, see e.g. <a href="https://github.com/LibreOffice/dictionaries">LibreOffice
|
* <p>For dictionaries, see e.g. <a href="https://github.com/LibreOffice/dictionaries">LibreOffice
|
||||||
* repository</a> or <a href="https://github.com/wooorm/dictionaries">Titus Wormer's collection
|
* repository</a> or <a href="https://github.com/wooorm/dictionaries">Titus Wormer's collection
|
||||||
* (UTF)</a>
|
* (UTF)</a>
|
||||||
*
|
|
||||||
* @see org.apache.lucene.analysis.hunspell.HunspellStemFilter
|
|
||||||
* @see org.apache.lucene.analysis.hunspell.SpellChecker
|
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.analysis.hunspell;
|
package org.apache.lucene.analysis.hunspell;
|
||||||
|
|
|
@ -32,7 +32,7 @@ import org.junit.runner.RunWith;
|
||||||
import org.junit.runners.Parameterized;
|
import org.junit.runners.Parameterized;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Same as {@link SpellCheckerTest}, but checks all Hunspell's test data. The path to the checked
|
* Same as {@link TestSpellChecking}, but checks all Hunspell's test data. The path to the checked
|
||||||
* out Hunspell repository should be in {@code hunspell.repo.path} system property.
|
* out Hunspell repository should be in {@code hunspell.repo.path} system property.
|
||||||
*/
|
*/
|
||||||
@RunWith(Parameterized.class)
|
@RunWith(Parameterized.class)
|
||||||
|
@ -78,7 +78,7 @@ public class TestHunspellRepositoryTestCases {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void test() throws Throwable {
|
public void test() throws Throwable {
|
||||||
ThrowingRunnable test = () -> SpellCheckerTest.checkSpellCheckerExpectations(pathPrefix);
|
ThrowingRunnable test = () -> TestSpellChecking.checkSpellCheckerExpectations(pathPrefix);
|
||||||
if (EXPECTED_FAILURES.contains(testName)) {
|
if (EXPECTED_FAILURES.contains(testName)) {
|
||||||
Assert.assertThrows(Throwable.class, test);
|
Assert.assertThrows(Throwable.class, test);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -76,7 +76,7 @@ public class TestPerformance extends LuceneTestCase {
|
||||||
List<String> words = loadWords(code, wordCount, dictionary);
|
List<String> words = loadWords(code, wordCount, dictionary);
|
||||||
|
|
||||||
Stemmer stemmer = new Stemmer(dictionary);
|
Stemmer stemmer = new Stemmer(dictionary);
|
||||||
SpellChecker speller = new SpellChecker(dictionary);
|
Hunspell speller = new Hunspell(dictionary);
|
||||||
measure(
|
measure(
|
||||||
"Stemming " + code,
|
"Stemming " + code,
|
||||||
blackHole -> {
|
blackHole -> {
|
||||||
|
|
|
@ -26,7 +26,7 @@ import java.util.stream.Collectors;
|
||||||
import org.apache.lucene.store.ByteBuffersDirectory;
|
import org.apache.lucene.store.ByteBuffersDirectory;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
public class SpellCheckerTest extends StemmerTestBase {
|
public class TestSpellChecking extends StemmerTestBase {
|
||||||
|
|
||||||
public void testBase() throws Exception {
|
public void testBase() throws Exception {
|
||||||
doTest("base");
|
doTest("base");
|
||||||
|
@ -221,11 +221,11 @@ public class SpellCheckerTest extends StemmerTestBase {
|
||||||
InputStream affixStream = Files.newInputStream(Path.of(basePath.toString() + ".aff"));
|
InputStream affixStream = Files.newInputStream(Path.of(basePath.toString() + ".aff"));
|
||||||
InputStream dictStream = Files.newInputStream(Path.of(basePath.toString() + ".dic"));
|
InputStream dictStream = Files.newInputStream(Path.of(basePath.toString() + ".dic"));
|
||||||
|
|
||||||
SpellChecker speller;
|
Hunspell speller;
|
||||||
try {
|
try {
|
||||||
Dictionary dictionary =
|
Dictionary dictionary =
|
||||||
new Dictionary(new ByteBuffersDirectory(), "dictionary", affixStream, dictStream);
|
new Dictionary(new ByteBuffersDirectory(), "dictionary", affixStream, dictStream);
|
||||||
speller = new SpellChecker(dictionary);
|
speller = new Hunspell(dictionary);
|
||||||
} finally {
|
} finally {
|
||||||
IOUtils.closeWhileHandlingException(affixStream);
|
IOUtils.closeWhileHandlingException(affixStream);
|
||||||
IOUtils.closeWhileHandlingException(dictStream);
|
IOUtils.closeWhileHandlingException(dictStream);
|
Loading…
Reference in New Issue