mirror of https://github.com/apache/lucene.git
LUCENE-9775: Hunspell: make FORCEUCASE work when the first compound word is inherently title-case (#2375)
This commit is contained in:
parent
2555418048
commit
902cb93db2
|
@ -94,7 +94,7 @@ class CheckCompoundPattern {
|
|||
CharsRef expandReplacement(CharsRef word, int breakPos) {
|
||||
if (replacement != null && charsMatch(word, breakPos, replacement)) {
|
||||
return new CharsRef(
|
||||
word.subSequence(0, breakPos)
|
||||
new String(word.chars, 0, word.offset + breakPos)
|
||||
+ endChars
|
||||
+ beginChars
|
||||
+ word.subSequence(breakPos + replacement.length(), word.length));
|
||||
|
|
|
@ -171,6 +171,10 @@ public class Hunspell {
|
|||
|
||||
private boolean checkCompounds(CharsRef word, WordCase originalCase, CompoundPart prev) {
|
||||
if (prev != null && prev.index > dictionary.compoundMax - 2) return false;
|
||||
if (prev == null && word.offset != 0) {
|
||||
// we check the word's beginning for FORCEUCASE and expect to find it at 0
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
|
||||
int limit = word.length - dictionary.compoundMin + 1;
|
||||
for (int breakPos = dictionary.compoundMin; breakPos < limit; breakPos++) {
|
||||
|
@ -231,7 +235,7 @@ public class Hunspell {
|
|||
if (lastRoot != null
|
||||
&& !dictionary.hasFlag(lastRoot.entryId, dictionary.forbiddenword)
|
||||
&& !(dictionary.checkCompoundDup && prev.root.equals(lastRoot))
|
||||
&& !hasForceUCaseProblem(lastRoot, originalCase)
|
||||
&& !hasForceUCaseProblem(lastRoot, originalCase, word.chars)
|
||||
&& prev.mayCompound(lastRoot, remainingLength, originalCase)) {
|
||||
return true;
|
||||
}
|
||||
|
@ -240,8 +244,9 @@ public class Hunspell {
|
|||
return checkCompounds(tail, originalCase, prev);
|
||||
}
|
||||
|
||||
private boolean hasForceUCaseProblem(Root<?> root, WordCase originalCase) {
|
||||
private boolean hasForceUCaseProblem(Root<?> root, WordCase originalCase, char[] wordChars) {
|
||||
if (originalCase == WordCase.TITLE || originalCase == WordCase.UPPER) return false;
|
||||
if (originalCase == null && Character.isUpperCase(wordChars[0])) return false;
|
||||
return dictionary.hasFlag(root.entryId, dictionary.forceUCase);
|
||||
}
|
||||
|
||||
|
|
|
@ -2,3 +2,4 @@
|
|||
foo/C
|
||||
bar/C
|
||||
baz/CA
|
||||
Upper/C
|
|
@ -1,3 +1,4 @@
|
|||
Foobaz
|
||||
foo
|
||||
bar
|
||||
baz
|
||||
|
@ -5,3 +6,4 @@ foobar
|
|||
Foobaz
|
||||
foobazbar
|
||||
Foobarbaz
|
||||
Upperbaz
|
||||
|
|
Loading…
Reference in New Issue