LUCENE-9785: Hunspell: don't check case in compound middle and end (#2398)

This commit is contained in:
Peter Gromov 2021-02-19 20:16:39 +01:00 committed by GitHub
parent 5325d2e6f4
commit 31a64927a4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 18 additions and 4 deletions

View File

@ -160,6 +160,7 @@ public class Hunspell {
private Root<CharsRef> findStem(
char[] wordChars, int offset, int length, WordCase originalCase, WordContext context) {
checkCanceled.run();
boolean checkCase = context != COMPOUND_MIDDLE && context != COMPOUND_END;
@SuppressWarnings({"rawtypes", "unchecked"})
Root<CharsRef>[] result = new Root[1];
stemmer.doStem(
@ -168,7 +169,7 @@ public class Hunspell {
length,
context,
(stem, formID, morphDataId) -> {
if (!acceptCase(originalCase, formID, stem)) {
if (checkCase && !acceptCase(originalCase, formID, stem)) {
return dictionary.hasFlag(formID, Dictionary.HIDDEN_FLAG);
}
if (acceptsStem(formID)) {

View File

@ -4,3 +4,5 @@ KEEPCASE Z
SFX X Y 1
SFX X 0 s . +s
COMPOUNDFLAG C

View File

@ -1,4 +1,4 @@
9
11
drink/X
walk/XZ
test/Z
@ -8,3 +8,6 @@ baz./Z
Quux./Z
way/X
ways/Z
tvv/ZC
school/C
uni/ZC

View File

@ -5,4 +5,8 @@ Quux.
way
Way
WAY
ways
ways
schooltvv
Schooltvv
SCHOOLTVV
unitvv

View File

@ -6,4 +6,6 @@ baz.
baz.
Quux.
Quux.
Way
Way
unitvv, Uni tvv, uni
unitvv, UNI TVV, uni

View File

@ -7,3 +7,5 @@ BAZ.
quux.
QUUX.
Ways
Unitvv
UNITVV