mirror of https://github.com/apache/lucene.git
hunspell: disallow hidden title-case entries from compound middle/end (#12220)
if we only have custom-case uART and capitalized UART, we shouldn't accept StandUart as a compound (although we keep hidden "Uart" dictionary entries for internal purposes)
This commit is contained in:
parent
56e65919b1
commit
56aef7265a
|
@ -164,7 +164,7 @@ public class Hunspell {
|
|||
Root<CharsRef> findStem(
|
||||
char[] wordChars, int offset, int length, WordCase originalCase, WordContext context) {
|
||||
checkCanceled.run();
|
||||
boolean checkCase = context != COMPOUND_MIDDLE && context != COMPOUND_END;
|
||||
WordCase toCheck = context != COMPOUND_MIDDLE && context != COMPOUND_END ? originalCase : null;
|
||||
@SuppressWarnings({"rawtypes", "unchecked"})
|
||||
Root<CharsRef>[] result = new Root[1];
|
||||
stemmer.doStem(
|
||||
|
@ -173,7 +173,7 @@ public class Hunspell {
|
|||
length,
|
||||
context,
|
||||
(stem, formID, morphDataId, outerPrefix, innerPrefix, outerSuffix, innerSuffix) -> {
|
||||
if (checkCase && !acceptCase(originalCase, formID, stem)) {
|
||||
if (!acceptCase(toCheck, formID, stem)) {
|
||||
return dictionary.hasFlag(formID, Dictionary.HIDDEN_FLAG);
|
||||
}
|
||||
if (acceptsStem(formID)) {
|
||||
|
|
|
@ -205,6 +205,10 @@ public class TestSpellChecking extends LuceneTestCase {
|
|||
doTest("germancompounding");
|
||||
}
|
||||
|
||||
public void testGermanManualCase() throws Exception {
|
||||
doTest("germanManualCase");
|
||||
}
|
||||
|
||||
public void testApplyOconvToSuggestions() throws Exception {
|
||||
doTest("oconv");
|
||||
}
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
# no CHECKCOMPOUNDCASE
|
||||
|
||||
# compound flags
|
||||
|
||||
COMPOUNDBEGIN U
|
||||
COMPOUNDMIDDLE V
|
||||
COMPOUNDEND W
|
||||
|
||||
ONLYINCOMPOUND X
|
||||
COMPOUNDPERMITFLAG P
|
||||
|
||||
COMPOUNDMIN 1
|
||||
WORDCHARS -
|
||||
|
||||
# dash prefix for compounds with dash (Arbeits-Computer)
|
||||
|
||||
PFX - Y 1
|
||||
PFX - 0 -/P .
|
||||
|
||||
# decapitalizing prefix
|
||||
|
||||
PFX D Y 29
|
||||
PFX D A a/PX A
|
||||
PFX D <20> <20>/PX <20>
|
||||
PFX D B b/PX B
|
||||
PFX D C c/PX C
|
||||
PFX D D d/PX D
|
||||
PFX D E e/PX E
|
||||
PFX D F f/PX F
|
||||
PFX D G g/PX G
|
||||
PFX D H h/PX H
|
||||
PFX D I i/PX I
|
||||
PFX D J j/PX J
|
||||
PFX D K k/PX K
|
||||
PFX D L l/PX L
|
||||
PFX D M m/PX M
|
||||
PFX D N n/PX N
|
||||
PFX D O o/PX O
|
||||
PFX D <20> <20>/PX <20>
|
||||
PFX D P p/PX P
|
||||
PFX D Q q/PX Q
|
||||
PFX D R r/PX R
|
||||
PFX D S s/PX S
|
||||
PFX D T t/PX T
|
||||
PFX D U u/PX U
|
||||
PFX D <20> <20>/PX <20>
|
||||
PFX D V v/PX V
|
||||
PFX D W w/PX W
|
||||
PFX D X x/PX X
|
||||
PFX D Y y/PX Y
|
||||
PFX D Z z/PX Z
|
|
@ -0,0 +1,5 @@
|
|||
4
|
||||
uART/XW-
|
||||
bein/XW-
|
||||
Stand/UX
|
||||
UART/-
|
|
@ -0,0 +1,3 @@
|
|||
UART
|
||||
Standbein
|
||||
Stand-uART
|
|
@ -0,0 +1,3 @@
|
|||
StandUart
|
||||
uART
|
||||
Uart
|
Loading…
Reference in New Issue