LUCENE-9775: Hunspell: make FORCEUCASE work when the first compound word is inherently title-case (#2375)

This commit is contained in:
Peter Gromov 2021-02-17 07:54:12 +01:00 committed by GitHub
parent 2555418048
commit 902cb93db2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 11 additions and 3 deletions

View File

@ -94,7 +94,7 @@ class CheckCompoundPattern {
CharsRef expandReplacement(CharsRef word, int breakPos) {
if (replacement != null && charsMatch(word, breakPos, replacement)) {
return new CharsRef(
word.subSequence(0, breakPos)
new String(word.chars, 0, word.offset + breakPos)
+ endChars
+ beginChars
+ word.subSequence(breakPos + replacement.length(), word.length));

View File

@ -171,6 +171,10 @@ public class Hunspell {
private boolean checkCompounds(CharsRef word, WordCase originalCase, CompoundPart prev) {
if (prev != null && prev.index > dictionary.compoundMax - 2) return false;
if (prev == null && word.offset != 0) {
// we check the word's beginning for FORCEUCASE and expect to find it at 0
throw new IllegalArgumentException();
}
int limit = word.length - dictionary.compoundMin + 1;
for (int breakPos = dictionary.compoundMin; breakPos < limit; breakPos++) {
@ -231,7 +235,7 @@ public class Hunspell {
if (lastRoot != null
&& !dictionary.hasFlag(lastRoot.entryId, dictionary.forbiddenword)
&& !(dictionary.checkCompoundDup && prev.root.equals(lastRoot))
&& !hasForceUCaseProblem(lastRoot, originalCase)
&& !hasForceUCaseProblem(lastRoot, originalCase, word.chars)
&& prev.mayCompound(lastRoot, remainingLength, originalCase)) {
return true;
}
@ -240,8 +244,9 @@ public class Hunspell {
return checkCompounds(tail, originalCase, prev);
}
private boolean hasForceUCaseProblem(Root<?> root, WordCase originalCase) {
private boolean hasForceUCaseProblem(Root<?> root, WordCase originalCase, char[] wordChars) {
if (originalCase == WordCase.TITLE || originalCase == WordCase.UPPER) return false;
if (originalCase == null && Character.isUpperCase(wordChars[0])) return false;
return dictionary.hasFlag(root.entryId, dictionary.forceUCase);
}

View File

@ -2,3 +2,4 @@
foo/C
bar/C
baz/CA
Upper/C

View File

@ -1,3 +1,4 @@
Foobaz
foo
bar
baz
@ -5,3 +6,4 @@ foobar
Foobaz
foobazbar
Foobarbaz
Upperbaz