LUCENE-9783: Hunspell: don't suggest more than 4 ngram corrections by default (#2388)

This commit is contained in:
Peter Gromov 2021-02-18 09:27:06 +01:00 committed by GitHub
parent f83c9862e8
commit f879c6ad84
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 21 additions and 2 deletions

View File

@ -166,7 +166,7 @@ public class Dictionary {
List<RepEntry> repTable = new ArrayList<>();
List<List<String>> mapTable = new ArrayList<>();
int maxDiff = 5;
int maxNGramSuggestions = Integer.MAX_VALUE;
int maxNGramSuggestions = 4;
boolean onlyMaxDiff;
char noSuggest, subStandard;
ConvTable iconv, oconv;

View File

@ -298,7 +298,7 @@ class GeneratingSuggester {
&& result.stream().noneMatch(weighted.word::contains)
&& speller.checkWord(weighted.word)) {
result.add(weighted.word);
if (result.size() > dictionary.maxNGramSuggestions) {
if (result.size() >= dictionary.maxNGramSuggestions) {
break;
}
}

View File

@ -200,6 +200,10 @@ public class TestSpellChecking extends StemmerTestBase {
doTest("sug2");
}
public void testMaxNGramSugsDefaultIsNotUnlimited() throws Exception {
doTest("maxNGramSugsDefault");
}
public void testMixedCaseSuggestionHeuristics() throws Exception {
doTest("i58202");
}

View File

@ -0,0 +1,13 @@
12
cryptography
cryptographer
crystallographic
cartographic
photographic
typographic
cryptogram
topographic
orthographic
tomographic
phonographic
pictograph

View File

@ -0,0 +1 @@
cryptography, cryptographer, crystallographic, cartographic