mirror of https://github.com/apache/lucene.git
LUCENE-9753: Hunspell: disallow compounds with parts present in dictionary, space-separated (#2335)
This commit is contained in:
parent
c3166e1dc3
commit
6f525302dd
|
@ -261,12 +261,16 @@ public class SpellChecker {
|
|||
return false;
|
||||
}
|
||||
|
||||
//noinspection RedundantIfStatement
|
||||
if (dictionary.checkCompoundRep
|
||||
&& isMisspelledSimpleWord(length + nextPartLength, originalCase)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
String spaceSeparated =
|
||||
new String(tail.chars, tail.offset, length)
|
||||
+ " "
|
||||
+ new String(tail.chars, tail.offset + length, nextPartLength);
|
||||
return !checkWord(spaceSeparated);
|
||||
}
|
||||
|
||||
private boolean isMisspelledSimpleWord(int length, WordCase originalCase) {
|
||||
|
|
|
@ -132,6 +132,10 @@ public class SpellCheckerTest extends StemmerTestBase {
|
|||
doTest("checkcompoundrep");
|
||||
}
|
||||
|
||||
public void testDisallowCompoundsWhenDictionaryContainsSeparatedWordPair() throws Exception {
|
||||
doTest("wordpair");
|
||||
}
|
||||
|
||||
public void testCompoundrule() throws Exception {
|
||||
doTest("compoundrule");
|
||||
}
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
# a dictionary word pair separated by space
|
||||
# will avoid its recognition without space
|
||||
# at compound word analysis
|
||||
COMPOUNDFLAG Y
|
|
@ -0,0 +1,4 @@
|
|||
3
|
||||
word/Y
|
||||
compound/Y
|
||||
compound word
|
|
@ -0,0 +1,3 @@
|
|||
word
|
||||
compound
|
||||
wordcompound
|
|
@ -0,0 +1 @@
|
|||
compoundword
|
Loading…
Reference in New Issue