hunspell: support empty dictionaries, adapt to the hunspell/C++ repo changes (#11960)

hunspell: support empty dictionaries, adapt to the hunspell/C++ repo changes
This commit is contained in:
Peter Gromov 2022-11-22 18:23:45 +01:00 committed by GitHub
parent 0e0a20d88e
commit 2ae8dd632e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 19 additions and 5 deletions

View File

@ -60,7 +60,7 @@ Improvements
Optimizations
---------------------
* GITHUB#11857, GITHUB#11859, GITHUB#11893, GITHUB#11909: Hunspell: improved suggestion performance
* GITHUB#11857, GITHUB#11859, GITHUB#11893, GITHUB#11909: Hunspell: improved suggestion performance (Peter Gromov)
Bug Fixes
---------------------
@ -90,6 +90,8 @@ Other
* GITHUB#977, LUCENE-9500: Remove the deflater hack introduced because of JDK-8252739 (Uwe Schindler)
* GITHUB#11960: Hunspell: supported empty dictionaries (Peter Gromov)
======================== Lucene 9.5.0 =======================
API Changes

View File

@ -424,10 +424,13 @@ class WordStorage {
}
WordStorage build() throws IOException {
assert !group.isEmpty() : "build() should be only called once";
flushGroup();
if (hashTable.length > 0) {
assert !group.isEmpty() : "build() should be only called once";
flushGroup();
}
byte[] trimmedData = ArrayUtil.copyOfSubArray(wordData, 0, dataWriter.getPosition());
return new WordStorage(maxEntryLength, hasCustomMorphData, hashTable, trimmedData);
int[] table = hashTable.length == 0 ? new int[1] : hashTable;
return new WordStorage(maxEntryLength, hasCustomMorphData, table, trimmedData);
}
}

View File

@ -42,7 +42,6 @@ public class TestHunspellRepositoryTestCases {
"hu", // Hungarian is hard: a lot of its rules are hardcoded in Hunspell code, not aff/dic
"morph", // we don't do morphological analysis yet
"opentaal_keepcase", // Hunspell bug: https://github.com/hunspell/hunspell/issues/712
"forbiddenword", // needs https://github.com/hunspell/hunspell/pull/713 PR to be merged
"nepali", // not supported yet
"utf8_nonbmp", // code points not supported yet
"phone" // not supported yet, used only for suggestions in en_ZA

View File

@ -37,6 +37,10 @@ import org.apache.lucene.util.IOUtils;
public class TestSpellChecking extends LuceneTestCase {
public void testEmpty() throws Exception {
doTest("empty");
}
public void testBase() throws Exception {
doTest("base");
}

View File

@ -0,0 +1,2 @@
AF 2000
INVALID something

View File

@ -0,0 +1,3 @@
everything
is
wrong