hunspell: disallow hidden title-case entries from compound middle/end (#12220)

if we only have custom-case uART and capitalized UART, we shouldn't accept StandUart as a compound (although we keep hidden "Uart" dictionary entries for internal purposes)
This commit is contained in:
Peter Gromov 2023-04-03 20:06:58 +02:00 committed by GitHub
parent 56e65919b1
commit 56aef7265a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 68 additions and 2 deletions

View File

@ -164,7 +164,7 @@ public class Hunspell {
Root<CharsRef> findStem( Root<CharsRef> findStem(
char[] wordChars, int offset, int length, WordCase originalCase, WordContext context) { char[] wordChars, int offset, int length, WordCase originalCase, WordContext context) {
checkCanceled.run(); checkCanceled.run();
boolean checkCase = context != COMPOUND_MIDDLE && context != COMPOUND_END; WordCase toCheck = context != COMPOUND_MIDDLE && context != COMPOUND_END ? originalCase : null;
@SuppressWarnings({"rawtypes", "unchecked"}) @SuppressWarnings({"rawtypes", "unchecked"})
Root<CharsRef>[] result = new Root[1]; Root<CharsRef>[] result = new Root[1];
stemmer.doStem( stemmer.doStem(
@ -173,7 +173,7 @@ public class Hunspell {
length, length,
context, context,
(stem, formID, morphDataId, outerPrefix, innerPrefix, outerSuffix, innerSuffix) -> { (stem, formID, morphDataId, outerPrefix, innerPrefix, outerSuffix, innerSuffix) -> {
if (checkCase && !acceptCase(originalCase, formID, stem)) { if (!acceptCase(toCheck, formID, stem)) {
return dictionary.hasFlag(formID, Dictionary.HIDDEN_FLAG); return dictionary.hasFlag(formID, Dictionary.HIDDEN_FLAG);
} }
if (acceptsStem(formID)) { if (acceptsStem(formID)) {

View File

@ -205,6 +205,10 @@ public class TestSpellChecking extends LuceneTestCase {
doTest("germancompounding"); doTest("germancompounding");
} }
public void testGermanManualCase() throws Exception {
doTest("germanManualCase");
}
public void testApplyOconvToSuggestions() throws Exception { public void testApplyOconvToSuggestions() throws Exception {
doTest("oconv"); doTest("oconv");
} }

View File

@ -0,0 +1,51 @@
# no CHECKCOMPOUNDCASE
# compound flags
COMPOUNDBEGIN U
COMPOUNDMIDDLE V
COMPOUNDEND W
ONLYINCOMPOUND X
COMPOUNDPERMITFLAG P
COMPOUNDMIN 1
WORDCHARS -
# dash prefix for compounds with dash (Arbeits-Computer)
PFX - Y 1
PFX - 0 -/P .
# decapitalizing prefix
PFX D Y 29
PFX D A a/PX A
PFX D <20> <20>/PX <20>
PFX D B b/PX B
PFX D C c/PX C
PFX D D d/PX D
PFX D E e/PX E
PFX D F f/PX F
PFX D G g/PX G
PFX D H h/PX H
PFX D I i/PX I
PFX D J j/PX J
PFX D K k/PX K
PFX D L l/PX L
PFX D M m/PX M
PFX D N n/PX N
PFX D O o/PX O
PFX D <20> <20>/PX <20>
PFX D P p/PX P
PFX D Q q/PX Q
PFX D R r/PX R
PFX D S s/PX S
PFX D T t/PX T
PFX D U u/PX U
PFX D <20> <20>/PX <20>
PFX D V v/PX V
PFX D W w/PX W
PFX D X x/PX X
PFX D Y y/PX Y
PFX D Z z/PX Z

View File

@ -0,0 +1,5 @@
4
uART/XW-
bein/XW-
Stand/UX
UART/-

View File

@ -0,0 +1,3 @@
UART
Standbein
Stand-uART

View File

@ -0,0 +1,3 @@
StandUart
uART
Uart