From 1ff11dd02c59aa51786814ad1822655cd66e3e3c Mon Sep 17 00:00:00 2001 From: Peter Gromov Date: Mon, 15 Feb 2021 20:20:58 +0100 Subject: [PATCH] LUCENE-9772: Hunspell: CHECKCOMPOUNDCASE shouldn't prohibit dash-separated uppercase compounds (#2370) --- .../java/org/apache/lucene/analysis/hunspell/Hunspell.java | 4 +++- .../org/apache/lucene/analysis/hunspell/checkcompoundcase.aff | 1 + .../org/apache/lucene/analysis/hunspell/checkcompoundcase.dic | 3 ++- .../apache/lucene/analysis/hunspell/checkcompoundcase.good | 1 + 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java index 3286f86c1b0..99884218d25 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java @@ -336,7 +336,9 @@ public class Hunspell { private boolean mayBreakIntoCompounds(char[] chars, int offset, int length, int breakPos) { if (dictionary.checkCompoundCase) { - if (Character.isUpperCase(chars[breakPos - 1]) || Character.isUpperCase(chars[breakPos])) { + char a = chars[breakPos - 1]; + char b = chars[breakPos]; + if ((Character.isUpperCase(a) || Character.isUpperCase(b)) && a != '-' && b != '-') { return false; } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.aff index 7ac46eeab7c..006739d211f 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.aff +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.aff @@ -1,3 +1,4 @@ # forbid upper case letters at word bounds in compounding CHECKCOMPOUNDCASE +WORDCHARS - COMPOUNDFLAG A diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.dic index 80f65d38f60..6bdbbbab945 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.dic +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.dic @@ -1,5 +1,6 @@ -4 +5 foo/A Bar/A BAZ/A -/A +prefix-/A \ No newline at end of file diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.good b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.good index 9cbd79064dd..a2dfe5bbdf5 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.good +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.good @@ -3,3 +3,4 @@ foo-Bar foo-BAZ BAZ-foo BAZ-Bar +prefix-BAZ \ No newline at end of file