LUCENE-9772: Hunspell: CHECKCOMPOUNDCASE shouldn't prohibit dash-separated uppercase compounds (#2370)

This commit is contained in:
Peter Gromov 2021-02-15 20:20:58 +01:00 committed by GitHub
parent b20e277569
commit 1ff11dd02c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 7 additions and 2 deletions

View File

@ -336,7 +336,9 @@ public class Hunspell {
private boolean mayBreakIntoCompounds(char[] chars, int offset, int length, int breakPos) {
if (dictionary.checkCompoundCase) {
if (Character.isUpperCase(chars[breakPos - 1]) || Character.isUpperCase(chars[breakPos])) {
char a = chars[breakPos - 1];
char b = chars[breakPos];
if ((Character.isUpperCase(a) || Character.isUpperCase(b)) && a != '-' && b != '-') {
return false;
}
}

View File

@ -1,3 +1,4 @@
# forbid upper case letters at word bounds in compounding
CHECKCOMPOUNDCASE
WORDCHARS -
COMPOUNDFLAG A

View File

@ -1,5 +1,6 @@
4
5
foo/A
Bar/A
BAZ/A
-/A
prefix-/A

View File

@ -3,3 +3,4 @@ foo-Bar
foo-BAZ
BAZ-foo
BAZ-Bar
prefix-BAZ