mirror of https://github.com/apache/lucene.git
LUCENE-9703: Hunspell: prohibit FORBIDDENWORD words and their case variations (#2254)
This commit is contained in:
parent
4ba78f2ab2
commit
71705c900b
|
@ -57,6 +57,10 @@ public class SpellChecker {
|
|||
}
|
||||
|
||||
char[] wordChars = word.toCharArray();
|
||||
if (dictionary.isForbiddenWord(wordChars, wordChars.length, scratch)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (checkWord(wordChars, wordChars.length, false)) {
|
||||
return true;
|
||||
}
|
||||
|
@ -66,9 +70,7 @@ public class SpellChecker {
|
|||
return true;
|
||||
}
|
||||
|
||||
if (dictionary.breaks.isNotEmpty()
|
||||
&& !hasTooManyBreakOccurrences(word)
|
||||
&& !dictionary.isForbiddenWord(wordChars, word.length(), scratch)) {
|
||||
if (dictionary.breaks.isNotEmpty() && !hasTooManyBreakOccurrences(word)) {
|
||||
return tryBreaks(word);
|
||||
}
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.hunspell;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
@ -93,6 +94,10 @@ final class Stemmer {
|
|||
word = scratchBuffer;
|
||||
}
|
||||
|
||||
if (dictionary.isForbiddenWord(word, length, scratch)) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
WordCase wordCase = caseOf(word, length);
|
||||
List<CharsRef> list = doStem(word, 0, length, false, WordContext.SIMPLE_WORD);
|
||||
if (wordCase == WordCase.UPPER) {
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Ijs
|
|
@ -41,6 +41,11 @@ public class SpellCheckerTest extends StemmerTestBase {
|
|||
doTest("allcaps");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void IJ() throws Exception {
|
||||
doTest("IJ");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void i53643_numbersWithSeparators() throws Exception {
|
||||
doTest("i53643");
|
||||
|
|
|
@ -27,5 +27,6 @@ public class TestDutchIJ extends StemmerTestBase {
|
|||
public void testStemming() {
|
||||
assertStemsTo("ijs", "ijs");
|
||||
assertStemsTo("IJs", "ijs");
|
||||
assertStemsTo("Ijs");
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue