mirror of https://github.com/apache/lucene.git
LUCENE-9681: Hunspell spellchecker: support numbers with separators (#2224)
This commit is contained in:
parent
053060b925
commit
fdf04d8c63
|
@ -35,11 +35,17 @@ public class SpellChecker {
|
|||
|
||||
/** @return whether the given word's spelling is considered correct according to Hunspell rules */
|
||||
public boolean spell(String word) {
|
||||
if (word.isEmpty()) return true;
|
||||
|
||||
char[] wordChars = word.toCharArray();
|
||||
if (dictionary.isForbiddenWord(wordChars, scratch)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isNumber(word)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!stemmer.stem(wordChars, word.length()).isEmpty()) {
|
||||
return true;
|
||||
}
|
||||
|
@ -51,6 +57,28 @@ public class SpellChecker {
|
|||
return false;
|
||||
}
|
||||
|
||||
private static boolean isNumber(String s) {
|
||||
int i = 0;
|
||||
while (i < s.length()) {
|
||||
char c = s.charAt(i);
|
||||
if (isDigit(c)) {
|
||||
i++;
|
||||
} else if (c == '.' || c == ',' || c == '-') {
|
||||
if (i == 0 || i >= s.length() - 1 || !isDigit(s.charAt(i + 1))) {
|
||||
return false;
|
||||
}
|
||||
i += 2;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private static boolean isDigit(char c) {
|
||||
return c >= '0' && c <= '9';
|
||||
}
|
||||
|
||||
private boolean tryBreaks(String word) {
|
||||
for (String br : dictionary.breaks.starting) {
|
||||
if (word.length() > br.length() && word.startsWith(br)) {
|
||||
|
|
|
@ -23,8 +23,13 @@ import java.nio.file.Path;
|
|||
import java.util.Objects;
|
||||
import org.apache.lucene.store.ByteBuffersDirectory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
public class SpellCheckerTest extends StemmerTestBase {
|
||||
@Test
|
||||
public void i53643_numbersWithSeparators() throws Exception {
|
||||
doTest("i53643");
|
||||
}
|
||||
|
||||
public void testBreak() throws Exception {
|
||||
doTest("break");
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
# check numbers with separators
|
||||
WORDCHARS 0123456789.-,
|
|
@ -0,0 +1,2 @@
|
|||
1
|
||||
foo
|
|
@ -0,0 +1,21 @@
|
|||
1
|
||||
12
|
||||
123
|
||||
1234
|
||||
12345
|
||||
123456
|
||||
1234567
|
||||
1.1
|
||||
1.12
|
||||
1.123
|
||||
1.1234
|
||||
1.12345
|
||||
1.123456
|
||||
12.1
|
||||
123.12
|
||||
1234.123
|
||||
12345.1234
|
||||
123456.12345
|
||||
1234567.123456
|
||||
4,2
|
||||
42-42
|
|
@ -0,0 +1,4 @@
|
|||
1..2
|
||||
1,,2
|
||||
1.,2
|
||||
1,.2
|
Loading…
Reference in New Issue