LUCENE-9681: Hunspell spellchecker: support numbers with separators (#2224)

This commit is contained in:
Peter Gromov 2021-01-21 12:33:03 +01:00 committed by GitHub
parent 053060b925
commit fdf04d8c63
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 62 additions and 0 deletions

View File

@ -35,11 +35,17 @@ public class SpellChecker {
/** @return whether the given word's spelling is considered correct according to Hunspell rules */
public boolean spell(String word) {
if (word.isEmpty()) return true;
char[] wordChars = word.toCharArray();
if (dictionary.isForbiddenWord(wordChars, scratch)) {
return false;
}
if (isNumber(word)) {
return true;
}
if (!stemmer.stem(wordChars, word.length()).isEmpty()) {
return true;
}
@ -51,6 +57,28 @@ public class SpellChecker {
return false;
}
private static boolean isNumber(String s) {
int i = 0;
while (i < s.length()) {
char c = s.charAt(i);
if (isDigit(c)) {
i++;
} else if (c == '.' || c == ',' || c == '-') {
if (i == 0 || i >= s.length() - 1 || !isDigit(s.charAt(i + 1))) {
return false;
}
i += 2;
} else {
return false;
}
}
return true;
}
private static boolean isDigit(char c) {
return c >= '0' && c <= '9';
}
private boolean tryBreaks(String word) {
for (String br : dictionary.breaks.starting) {
if (word.length() > br.length() && word.startsWith(br)) {

View File

@ -23,8 +23,13 @@ import java.nio.file.Path;
import java.util.Objects;
import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.util.IOUtils;
import org.junit.Test;
public class SpellCheckerTest extends StemmerTestBase {
@Test
public void i53643_numbersWithSeparators() throws Exception {
doTest("i53643");
}
public void testBreak() throws Exception {
doTest("break");

View File

@ -0,0 +1,2 @@
# check numbers with separators
WORDCHARS 0123456789.-,

View File

@ -0,0 +1,21 @@
1
12
123
1234
12345
123456
1234567
1.1
1.12
1.123
1.1234
1.12345
1.123456
12.1
123.12
1234.123
12345.1234
123456.12345
1234567.123456
4,2
42-42

View File

@ -0,0 +1,4 @@
1..2
1,,2
1.,2
1,.2