diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java index 5fcbd4f730a..94b3c323b84 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java @@ -139,11 +139,13 @@ public class Dictionary { boolean twoStageAffix; char circumfix; - char keepcase; + char keepcase, forceUCase; char needaffix; char forbiddenword; - char onlyincompound, compoundBegin, compoundMiddle, compoundEnd, compoundPermit; - boolean checkCompoundCase; + char onlyincompound, compoundBegin, compoundMiddle, compoundEnd, compoundFlag; + char compoundPermit, compoundForbid; + boolean checkCompoundCase, checkCompoundDup; + boolean checkCompoundTriple, simplifiedTriple; int compoundMin = 3, compoundMax = Integer.MAX_VALUE; List compoundRules; // nullable @@ -350,6 +352,8 @@ public class Dictionary { circumfix = flagParsingStrategy.parseFlag(singleArgument(reader, line)); } else if ("KEEPCASE".equals(firstWord)) { keepcase = flagParsingStrategy.parseFlag(singleArgument(reader, line)); + } else if ("FORCEUCASE".equals(firstWord)) { + forceUCase = flagParsingStrategy.parseFlag(singleArgument(reader, line)); } else if ("NEEDAFFIX".equals(firstWord) || "PSEUDOROOT".equals(firstWord)) { needaffix = flagParsingStrategy.parseFlag(singleArgument(reader, line)); } else if ("ONLYINCOMPOUND".equals(firstWord)) { @@ -387,6 +391,8 @@ public class Dictionary { compoundMax = Math.max(1, Integer.parseInt(singleArgument(reader, line))); } else if ("COMPOUNDRULE".equals(firstWord)) { compoundRules = parseCompoundRules(reader, Integer.parseInt(singleArgument(reader, line))); + } else if ("COMPOUNDFLAG".equals(firstWord)) { + compoundFlag = flagParsingStrategy.parseFlag(singleArgument(reader, line)); } else if ("COMPOUNDBEGIN".equals(firstWord)) { compoundBegin = flagParsingStrategy.parseFlag(singleArgument(reader, line)); } else if ("COMPOUNDMIDDLE".equals(firstWord)) { @@ -395,8 +401,16 @@ public class Dictionary { compoundEnd = flagParsingStrategy.parseFlag(singleArgument(reader, line)); } else if ("COMPOUNDPERMITFLAG".equals(firstWord)) { compoundPermit = flagParsingStrategy.parseFlag(singleArgument(reader, line)); + } else if ("COMPOUNDFORBIDFLAG".equals(firstWord)) { + compoundForbid = flagParsingStrategy.parseFlag(singleArgument(reader, line)); } else if ("CHECKCOMPOUNDCASE".equals(firstWord)) { checkCompoundCase = true; + } else if ("CHECKCOMPOUNDDUP".equals(firstWord)) { + checkCompoundDup = true; + } else if ("CHECKCOMPOUNDTRIPLE".equals(firstWord)) { + checkCompoundTriple = true; + } else if ("SIMPLIFIEDTRIPLE".equals(firstWord)) { + simplifiedTriple = true; } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/SpellChecker.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/SpellChecker.java index b0c095ae9b5..10cc206fe65 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/SpellChecker.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/SpellChecker.java @@ -16,9 +16,16 @@ */ package org.apache.lucene.analysis.hunspell; +import static org.apache.lucene.analysis.hunspell.Dictionary.FLAG_UNSET; +import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_BEGIN; +import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_END; +import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_MIDDLE; +import static org.apache.lucene.analysis.hunspell.WordContext.SIMPLE_WORD; + import java.util.ArrayList; import java.util.List; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.IntsRef; /** @@ -126,7 +133,7 @@ public class SpellChecker { return false; } - if (hasStems(wordChars, 0, length, originalCase, WordContext.SIMPLE_WORD)) { + if (!stemmer.doStem(wordChars, 0, length, originalCase, SIMPLE_WORD).isEmpty()) { return true; } @@ -135,12 +142,11 @@ public class SpellChecker { return true; } - return dictionary.compoundBegin > 0 && checkCompounds(wordChars, 0, length, originalCase, 0); - } + if (dictionary.compoundBegin != FLAG_UNSET || dictionary.compoundFlag != FLAG_UNSET) { + return checkCompounds(wordChars, 0, length, originalCase, 0); + } - private boolean hasStems( - char[] chars, int offset, int length, WordCase originalCase, WordContext context) { - return !stemmer.doStem(chars, offset, length, originalCase, context).isEmpty(); + return false; } private boolean checkCompounds( @@ -149,12 +155,23 @@ public class SpellChecker { int limit = length - dictionary.compoundMin + 1; for (int breakPos = dictionary.compoundMin; breakPos < limit; breakPos++) { - WordContext context = depth == 0 ? WordContext.COMPOUND_BEGIN : WordContext.COMPOUND_MIDDLE; + WordContext context = depth == 0 ? COMPOUND_BEGIN : COMPOUND_MIDDLE; int breakOffset = offset + breakPos; - if (checkCompoundCase(chars, breakOffset) - && hasStems(chars, offset, breakPos, originalCase, context)) { + if (mayBreakIntoCompounds(chars, offset, length, breakOffset)) { + List stems = stemmer.doStem(chars, offset, breakPos, originalCase, context); + if (stems.isEmpty() + && dictionary.simplifiedTriple + && chars[breakOffset - 1] == chars[breakOffset]) { + stems = stemmer.doStem(chars, offset, breakPos + 1, originalCase, context); + } + if (stems.isEmpty()) continue; + int remainingLength = length - breakPos; - if (hasStems(chars, breakOffset, remainingLength, originalCase, WordContext.COMPOUND_END)) { + List lastStems = + stemmer.doStem(chars, breakOffset, remainingLength, originalCase, COMPOUND_END); + if (!lastStems.isEmpty() + && !(dictionary.checkCompoundDup && intersectIgnoreCase(stems, lastStems)) + && !hasForceUCaseProblem(chars, breakOffset, remainingLength, originalCase)) { return true; } @@ -167,9 +184,37 @@ public class SpellChecker { return false; } - private boolean checkCompoundCase(char[] chars, int breakPos) { - if (!dictionary.checkCompoundCase) return true; - return Character.isUpperCase(chars[breakPos - 1]) == Character.isUpperCase(chars[breakPos]); + private boolean hasForceUCaseProblem( + char[] chars, int offset, int length, WordCase originalCase) { + if (dictionary.forceUCase == FLAG_UNSET) return false; + if (originalCase == WordCase.TITLE || originalCase == WordCase.UPPER) return false; + + IntsRef forms = dictionary.lookupWord(chars, offset, length); + return forms != null && dictionary.hasFlag(forms, dictionary.forceUCase, scratch); + } + + private boolean intersectIgnoreCase(List stems1, List stems2) { + return stems1.stream().anyMatch(s1 -> stems2.stream().anyMatch(s2 -> equalsIgnoreCase(s1, s2))); + } + + private boolean equalsIgnoreCase(CharsRef cr1, CharsRef cr2) { + return cr1.toString().equalsIgnoreCase(cr2.toString()); + } + + private boolean mayBreakIntoCompounds(char[] chars, int offset, int length, int breakPos) { + if (dictionary.checkCompoundCase) { + if (Character.isUpperCase(chars[breakPos - 1]) || Character.isUpperCase(chars[breakPos])) { + return false; + } + } + if (dictionary.checkCompoundTriple && chars[breakPos - 1] == chars[breakPos]) { + //noinspection RedundantIfStatement + if (breakPos > offset + 1 && chars[breakPos - 2] == chars[breakPos - 1] + || breakPos < length - 1 && chars[breakPos] == chars[breakPos + 1]) { + return false; + } + } + return true; } private boolean checkCompoundRules( diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java index 31237156dec..a185d5846a6 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java @@ -238,9 +238,15 @@ final class Stemmer { if (!context.isCompound() && Dictionary.hasFlag(wordFlags, dictionary.onlyincompound)) { continue; } - if (context.isCompound() - && !Dictionary.hasFlag(wordFlags, context.requiredFlag(dictionary))) { - continue; + if (context.isCompound()) { + if (context != WordContext.COMPOUND_END + && Dictionary.hasFlag(wordFlags, dictionary.compoundForbid)) { + return new ArrayList<>(); + } + if (!Dictionary.hasFlag(wordFlags, dictionary.compoundFlag) + && !Dictionary.hasFlag(wordFlags, context.requiredFlag(dictionary))) { + continue; + } } stems.add(newStem(word, offset, length, forms, i)); } @@ -554,7 +560,10 @@ final class Stemmer { WordContext context) { int append = dictionary.affixData(affix, Dictionary.AFFIX_APPEND); - if (context.isCompound() && dictionary.compoundPermit > 0) { + if (context.isCompound()) { + if (!isPrefix && dictionary.hasFlag(append, dictionary.compoundForbid, scratch)) { + return false; + } WordContext allowed = isPrefix ? WordContext.COMPOUND_BEGIN : WordContext.COMPOUND_END; if (context != allowed && !dictionary.hasFlag(append, dictionary.compoundPermit, scratch)) { return false; @@ -672,7 +681,10 @@ final class Stemmer { } if (context.isCompound()) { char cFlag = context.requiredFlag(dictionary); - if (!Dictionary.hasFlag(wordFlags, cFlag) && !isFlagAppendedByAffix(affix, cFlag)) { + if (!Dictionary.hasFlag(wordFlags, cFlag) + && !isFlagAppendedByAffix(affix, cFlag) + && !Dictionary.hasFlag(wordFlags, dictionary.compoundFlag) + && !isFlagAppendedByAffix(affix, dictionary.compoundFlag)) { continue; } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java index 5bcfe05759c..f870f380110 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java @@ -46,6 +46,11 @@ public class SpellCheckerTest extends StemmerTestBase { doTest("allcaps"); } + @Test + public void forceUCase() throws Exception { + doTest("forceucase"); + } + @Test public void checkSharpS() throws Exception { doTest("checksharps"); @@ -71,6 +76,36 @@ public class SpellCheckerTest extends StemmerTestBase { doTest("needaffix5"); } + @Test + public void compoundFlag() throws Exception { + doTest("compoundflag"); + } + + @Test + public void checkCompoundCase() throws Exception { + doTest("checkcompoundcase"); + } + + @Test + public void checkCompoundDup() throws Exception { + doTest("checkcompounddup"); + } + + @Test + public void checkCompoundTriple() throws Exception { + doTest("checkcompoundtriple"); + } + + @Test + public void simplifiedTriple() throws Exception { + doTest("simplifiedtriple"); + } + + @Test + public void compoundForbid() throws Exception { + doTest("compoundforbid"); + } + public void testBreak() throws Exception { doTest("break"); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.aff new file mode 100644 index 00000000000..7ac46eeab7c --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.aff @@ -0,0 +1,3 @@ +# forbid upper case letters at word bounds in compounding +CHECKCOMPOUNDCASE +COMPOUNDFLAG A diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.dic new file mode 100644 index 00000000000..80f65d38f60 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.dic @@ -0,0 +1,5 @@ +4 +foo/A +Bar/A +BAZ/A +-/A diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.good b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.good new file mode 100644 index 00000000000..9cbd79064dd --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.good @@ -0,0 +1,5 @@ +Barfoo +foo-Bar +foo-BAZ +BAZ-foo +BAZ-Bar diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.wrong b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.wrong new file mode 100644 index 00000000000..0714c22e5d3 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundcase.wrong @@ -0,0 +1,3 @@ +fooBar +BAZBar +BAZfoo diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompounddup.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompounddup.aff new file mode 100644 index 00000000000..5cd357a5a58 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompounddup.aff @@ -0,0 +1,3 @@ +# Forbid compound word with triple letters +CHECKCOMPOUNDDUP +COMPOUNDFLAG A diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompounddup.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompounddup.dic new file mode 100644 index 00000000000..8ac75f4fc5d --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompounddup.dic @@ -0,0 +1,3 @@ +2 +foo/A +bar/A diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompounddup.good b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompounddup.good new file mode 100644 index 00000000000..3866f24cae2 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompounddup.good @@ -0,0 +1,5 @@ +barfoo +foobar +foofoobar +foobarfoo +barfoobarfoo diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompounddup.wrong b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompounddup.wrong new file mode 100644 index 00000000000..5e809b3d8c8 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompounddup.wrong @@ -0,0 +1,3 @@ +foofoo +foofoofoo +foobarbar diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundtriple.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundtriple.aff new file mode 100644 index 00000000000..7159cf55dd4 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundtriple.aff @@ -0,0 +1,3 @@ +# Forbid compound word with triple letters +CHECKCOMPOUNDTRIPLE +COMPOUNDFLAG A diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundtriple.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundtriple.dic new file mode 100644 index 00000000000..607c489e8b7 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundtriple.dic @@ -0,0 +1,5 @@ +4 +foo/A +opera/A +eel/A +bare/A diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundtriple.good b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundtriple.good new file mode 100644 index 00000000000..1293f749ad0 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundtriple.good @@ -0,0 +1,6 @@ +operafoo +operaeel +operabare +eelbare +eelfoo +eelopera diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundtriple.wrong b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundtriple.wrong new file mode 100644 index 00000000000..ae2d02b20d8 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/checkcompoundtriple.wrong @@ -0,0 +1,2 @@ +fooopera +bareeel diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundflag.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundflag.aff new file mode 100644 index 00000000000..bc8369ceba8 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundflag.aff @@ -0,0 +1,3 @@ +COMPOUNDMIN 3 +COMPOUNDFLAG A + diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundflag.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundflag.dic new file mode 100644 index 00000000000..d1ea8e96e7f --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundflag.dic @@ -0,0 +1,5 @@ +4 +foo/A +bar/A +xy/A +yz/A diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundflag.good b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundflag.good new file mode 100644 index 00000000000..21cc29f2f04 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundflag.good @@ -0,0 +1,3 @@ +foobar +barfoo +foobarfoo diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundflag.wrong b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundflag.wrong new file mode 100644 index 00000000000..c185bf150ef --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundflag.wrong @@ -0,0 +1,4 @@ +xyyz +fooxy +xyfoo +fooxybar diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundforbid.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundforbid.aff new file mode 100644 index 00000000000..8703ef4cf01 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundforbid.aff @@ -0,0 +1,15 @@ +# Dictionary words with COMPOUNDFORBIDFLAG are +# removed from the beginning and middle of +# compound words, overriding the effect of +# COMPOUNDPERMITFLAG. +# +# See compoundaffix3 test for basic usage +# of COMPOUNDFORBIDFLAG. + +COMPOUNDFLAG X +COMPOUNDPERMITFLAG Y +COMPOUNDFORBIDFLAG Z + +SFX S Y 2 +SFX S 0 bar/YX . +SFX S 0 baz/YX . diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundforbid.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundforbid.dic new file mode 100644 index 00000000000..fd408a34908 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundforbid.dic @@ -0,0 +1,4 @@ +3 +foo/S +example/X +foobaz/Z diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundforbid.good b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundforbid.good new file mode 100644 index 00000000000..b06ff081f87 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundforbid.good @@ -0,0 +1,5 @@ +foo +example +foobar +foobaz +foobarexample diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundforbid.wrong b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundforbid.wrong new file mode 100644 index 00000000000..463e1d70873 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compoundforbid.wrong @@ -0,0 +1,3 @@ +fooexample +examplefoo +foobazexample diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/forceucase.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/forceucase.aff new file mode 100644 index 00000000000..5eebcbdab74 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/forceucase.aff @@ -0,0 +1,4 @@ +# force capitalized compound +TRY F +FORCEUCASE A +COMPOUNDFLAG C diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/forceucase.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/forceucase.dic new file mode 100644 index 00000000000..82fd93b3097 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/forceucase.dic @@ -0,0 +1,4 @@ +3 +foo/C +bar/C +baz/CA diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/forceucase.good b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/forceucase.good new file mode 100644 index 00000000000..37ecf495731 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/forceucase.good @@ -0,0 +1,7 @@ +foo +bar +baz +foobar +Foobaz +foobazbar +Foobarbaz diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/forceucase.wrong b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/forceucase.wrong new file mode 100644 index 00000000000..1503e42ddcf --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/forceucase.wrong @@ -0,0 +1,2 @@ +foobaz +foobarbaz diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/simplifiedtriple.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/simplifiedtriple.aff new file mode 100644 index 00000000000..3ab347319ae --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/simplifiedtriple.aff @@ -0,0 +1,8 @@ +# Forbid compound word with triple letters +CHECKCOMPOUNDTRIPLE +# Allow simplified forms +SIMPLIFIEDTRIPLE + +COMPOUNDMIN 2 + +COMPOUNDFLAG A diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/simplifiedtriple.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/simplifiedtriple.dic new file mode 100644 index 00000000000..cfe7a35dcea --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/simplifiedtriple.dic @@ -0,0 +1,3 @@ +2 +glass/A +sko/A diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/simplifiedtriple.good b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/simplifiedtriple.good new file mode 100644 index 00000000000..23a4815e8b6 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/simplifiedtriple.good @@ -0,0 +1,3 @@ +glass +sko +glassko diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/simplifiedtriple.wrong b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/simplifiedtriple.wrong new file mode 100644 index 00000000000..2811287685e --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/simplifiedtriple.wrong @@ -0,0 +1 @@ +glasssko