LUCENE-9721: Hunspell: disallow ONLYINCOMPOUND suffixes at the very end of compound words (#2294)

This commit is contained in:
Peter Gromov 2021-02-03 17:46:54 +01:00 committed by GitHub
parent a79f641561
commit d95e405fec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 31 additions and 0 deletions

View File

@ -568,6 +568,12 @@ final class Stemmer {
if (context != allowed && !dictionary.hasFlag(append, dictionary.compoundPermit, scratch)) {
return false;
}
if (context == WordContext.COMPOUND_END
&& !isPrefix
&& !previousWasPrefix
&& dictionary.hasFlag(append, dictionary.onlyincompound, scratch)) {
return false;
}
}
if (recursionDepth == 0) {

View File

@ -152,6 +152,10 @@ public class SpellCheckerTest extends StemmerTestBase {
doTest("compoundrule8");
}
public void testDisallowCompoundOnlySuffixesAtTheVeryEnd() throws Exception {
doTest("onlyincompound2");
}
public void testGermanCompounding() throws Exception {
doTest("germancompounding");
}

View File

@ -0,0 +1,12 @@
# affixes only in compounds (see also fogemorpheme example)
ONLYINCOMPOUND O
COMPOUNDFLAG A
COMPOUNDPERMITFLAG P
SFX B Y 1
SFX B 0 s/OP .
# obligate fogemorpheme by forbidding the stem (0) in compounds
CHECKCOMPOUNDPATTERN 1
CHECKCOMPOUNDPATTERN 0/B /A

View File

@ -0,0 +1,3 @@
2
foo/A
pseudo/AB

View File

@ -0,0 +1,3 @@
foo
foopseudo
pseudosfoo

View File

@ -0,0 +1,3 @@
pseudos
foopseudos
pseudofoo