mirror of https://github.com/apache/lucene.git
LUCENE-9703: Hunspell: prohibit FORBIDDENWORD words and their case variations (#2254)
This commit is contained in:
parent
4ba78f2ab2
commit
71705c900b
|
@ -57,6 +57,10 @@ public class SpellChecker {
|
||||||
}
|
}
|
||||||
|
|
||||||
char[] wordChars = word.toCharArray();
|
char[] wordChars = word.toCharArray();
|
||||||
|
if (dictionary.isForbiddenWord(wordChars, wordChars.length, scratch)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (checkWord(wordChars, wordChars.length, false)) {
|
if (checkWord(wordChars, wordChars.length, false)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -66,9 +70,7 @@ public class SpellChecker {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dictionary.breaks.isNotEmpty()
|
if (dictionary.breaks.isNotEmpty() && !hasTooManyBreakOccurrences(word)) {
|
||||||
&& !hasTooManyBreakOccurrences(word)
|
|
||||||
&& !dictionary.isForbiddenWord(wordChars, word.length(), scratch)) {
|
|
||||||
return tryBreaks(word);
|
return tryBreaks(word);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.hunspell;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
@ -93,6 +94,10 @@ final class Stemmer {
|
||||||
word = scratchBuffer;
|
word = scratchBuffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (dictionary.isForbiddenWord(word, length, scratch)) {
|
||||||
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
|
|
||||||
WordCase wordCase = caseOf(word, length);
|
WordCase wordCase = caseOf(word, length);
|
||||||
List<CharsRef> list = doStem(word, 0, length, false, WordContext.SIMPLE_WORD);
|
List<CharsRef> list = doStem(word, 0, length, false, WordContext.SIMPLE_WORD);
|
||||||
if (wordCase == WordCase.UPPER) {
|
if (wordCase == WordCase.UPPER) {
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Ijs
|
|
@ -41,6 +41,11 @@ public class SpellCheckerTest extends StemmerTestBase {
|
||||||
doTest("allcaps");
|
doTest("allcaps");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void IJ() throws Exception {
|
||||||
|
doTest("IJ");
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void i53643_numbersWithSeparators() throws Exception {
|
public void i53643_numbersWithSeparators() throws Exception {
|
||||||
doTest("i53643");
|
doTest("i53643");
|
||||||
|
|
|
@ -27,5 +27,6 @@ public class TestDutchIJ extends StemmerTestBase {
|
||||||
public void testStemming() {
|
public void testStemming() {
|
||||||
assertStemsTo("ijs", "ijs");
|
assertStemsTo("ijs", "ijs");
|
||||||
assertStemsTo("IJs", "ijs");
|
assertStemsTo("IJs", "ijs");
|
||||||
|
assertStemsTo("Ijs");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue