LUCENE-9753: Hunspell: disallow compounds with parts present in dictionary, space-separated (#2335)

This commit is contained in:
Peter Gromov 2021-02-10 09:23:15 +01:00 committed by GitHub
parent c3166e1dc3
commit 6f525302dd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 22 additions and 2 deletions

View File

@ -261,12 +261,16 @@ public class SpellChecker {
return false;
}
//noinspection RedundantIfStatement
if (dictionary.checkCompoundRep
&& isMisspelledSimpleWord(length + nextPartLength, originalCase)) {
return false;
}
return true;
String spaceSeparated =
new String(tail.chars, tail.offset, length)
+ " "
+ new String(tail.chars, tail.offset + length, nextPartLength);
return !checkWord(spaceSeparated);
}
private boolean isMisspelledSimpleWord(int length, WordCase originalCase) {

View File

@ -132,6 +132,10 @@ public class SpellCheckerTest extends StemmerTestBase {
doTest("checkcompoundrep");
}
public void testDisallowCompoundsWhenDictionaryContainsSeparatedWordPair() throws Exception {
doTest("wordpair");
}
public void testCompoundrule() throws Exception {
doTest("compoundrule");
}

View File

@ -0,0 +1,4 @@
# a dictionary word pair separated by space
# will avoid its recognition without space
# at compound word analysis
COMPOUNDFLAG Y

View File

@ -0,0 +1,4 @@
3
word/Y
compound/Y
compound word

View File

@ -0,0 +1,3 @@
word
compound
wordcompound