mirror of https://github.com/apache/lucene.git
hunspell: disallow hidden title-case entries from compound middle/end (#12220)
if we only have custom-case uART and capitalized UART, we shouldn't accept StandUart as a compound (although we keep hidden "Uart" dictionary entries for internal purposes)
This commit is contained in:
parent
56e65919b1
commit
56aef7265a
|
@ -164,7 +164,7 @@ public class Hunspell {
|
||||||
Root<CharsRef> findStem(
|
Root<CharsRef> findStem(
|
||||||
char[] wordChars, int offset, int length, WordCase originalCase, WordContext context) {
|
char[] wordChars, int offset, int length, WordCase originalCase, WordContext context) {
|
||||||
checkCanceled.run();
|
checkCanceled.run();
|
||||||
boolean checkCase = context != COMPOUND_MIDDLE && context != COMPOUND_END;
|
WordCase toCheck = context != COMPOUND_MIDDLE && context != COMPOUND_END ? originalCase : null;
|
||||||
@SuppressWarnings({"rawtypes", "unchecked"})
|
@SuppressWarnings({"rawtypes", "unchecked"})
|
||||||
Root<CharsRef>[] result = new Root[1];
|
Root<CharsRef>[] result = new Root[1];
|
||||||
stemmer.doStem(
|
stemmer.doStem(
|
||||||
|
@ -173,7 +173,7 @@ public class Hunspell {
|
||||||
length,
|
length,
|
||||||
context,
|
context,
|
||||||
(stem, formID, morphDataId, outerPrefix, innerPrefix, outerSuffix, innerSuffix) -> {
|
(stem, formID, morphDataId, outerPrefix, innerPrefix, outerSuffix, innerSuffix) -> {
|
||||||
if (checkCase && !acceptCase(originalCase, formID, stem)) {
|
if (!acceptCase(toCheck, formID, stem)) {
|
||||||
return dictionary.hasFlag(formID, Dictionary.HIDDEN_FLAG);
|
return dictionary.hasFlag(formID, Dictionary.HIDDEN_FLAG);
|
||||||
}
|
}
|
||||||
if (acceptsStem(formID)) {
|
if (acceptsStem(formID)) {
|
||||||
|
|
|
@ -205,6 +205,10 @@ public class TestSpellChecking extends LuceneTestCase {
|
||||||
doTest("germancompounding");
|
doTest("germancompounding");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testGermanManualCase() throws Exception {
|
||||||
|
doTest("germanManualCase");
|
||||||
|
}
|
||||||
|
|
||||||
public void testApplyOconvToSuggestions() throws Exception {
|
public void testApplyOconvToSuggestions() throws Exception {
|
||||||
doTest("oconv");
|
doTest("oconv");
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,51 @@
|
||||||
|
# no CHECKCOMPOUNDCASE
|
||||||
|
|
||||||
|
# compound flags
|
||||||
|
|
||||||
|
COMPOUNDBEGIN U
|
||||||
|
COMPOUNDMIDDLE V
|
||||||
|
COMPOUNDEND W
|
||||||
|
|
||||||
|
ONLYINCOMPOUND X
|
||||||
|
COMPOUNDPERMITFLAG P
|
||||||
|
|
||||||
|
COMPOUNDMIN 1
|
||||||
|
WORDCHARS -
|
||||||
|
|
||||||
|
# dash prefix for compounds with dash (Arbeits-Computer)
|
||||||
|
|
||||||
|
PFX - Y 1
|
||||||
|
PFX - 0 -/P .
|
||||||
|
|
||||||
|
# decapitalizing prefix
|
||||||
|
|
||||||
|
PFX D Y 29
|
||||||
|
PFX D A a/PX A
|
||||||
|
PFX D <20> <20>/PX <20>
|
||||||
|
PFX D B b/PX B
|
||||||
|
PFX D C c/PX C
|
||||||
|
PFX D D d/PX D
|
||||||
|
PFX D E e/PX E
|
||||||
|
PFX D F f/PX F
|
||||||
|
PFX D G g/PX G
|
||||||
|
PFX D H h/PX H
|
||||||
|
PFX D I i/PX I
|
||||||
|
PFX D J j/PX J
|
||||||
|
PFX D K k/PX K
|
||||||
|
PFX D L l/PX L
|
||||||
|
PFX D M m/PX M
|
||||||
|
PFX D N n/PX N
|
||||||
|
PFX D O o/PX O
|
||||||
|
PFX D <20> <20>/PX <20>
|
||||||
|
PFX D P p/PX P
|
||||||
|
PFX D Q q/PX Q
|
||||||
|
PFX D R r/PX R
|
||||||
|
PFX D S s/PX S
|
||||||
|
PFX D T t/PX T
|
||||||
|
PFX D U u/PX U
|
||||||
|
PFX D <20> <20>/PX <20>
|
||||||
|
PFX D V v/PX V
|
||||||
|
PFX D W w/PX W
|
||||||
|
PFX D X x/PX X
|
||||||
|
PFX D Y y/PX Y
|
||||||
|
PFX D Z z/PX Z
|
|
@ -0,0 +1,5 @@
|
||||||
|
4
|
||||||
|
uART/XW-
|
||||||
|
bein/XW-
|
||||||
|
Stand/UX
|
||||||
|
UART/-
|
|
@ -0,0 +1,3 @@
|
||||||
|
UART
|
||||||
|
Standbein
|
||||||
|
Stand-uART
|
|
@ -0,0 +1,3 @@
|
||||||
|
StandUart
|
||||||
|
uART
|
||||||
|
Uart
|
Loading…
Reference in New Issue