diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 661cd041568..5401062e06c 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -44,7 +44,9 @@ New Features Improvements --------------------- -(No changes) + +* GITHUB#14079: Hunspell Dictionary now supports an option to tolerate REP rule count mismatches. + (Robert Muir) Optimizations --------------------- diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java index 525e39dc389..fc5353560b5 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java @@ -405,10 +405,18 @@ public class Dictionary { } else if ("TRY".equals(firstWord)) { tryChars = firstArgument(reader, line); } else if ("REP".equals(firstWord)) { - int count = parseNum(reader, line); - for (int i = 0; i < count; i++) { - String[] parts = splitBySpace(reader, reader.readLine(), 3, Integer.MAX_VALUE); - repTable.add(new RepEntry(parts[1], parts[2])); + if (tolerateRepRuleCountMismatches()) { + String[] parts = splitBySpace(reader, line, 2, Integer.MAX_VALUE); + // ignore REP N, as actual N may be incorrect + if (parts.length >= 3) { + repTable.add(new RepEntry(parts[1], parts[2])); + } + } else { + int count = parseNum(reader, line); + for (int i = 0; i < count; i++) { + String[] parts = splitBySpace(reader, reader.readLine(), 3, Integer.MAX_VALUE); + repTable.add(new RepEntry(parts[1], parts[2])); + } } } else if ("MAP".equals(firstWord)) { int count = parseNum(reader, line); @@ -1168,6 +1176,14 @@ public class Dictionary { return false; } + /** + * Whether incorrect REP rule counts will be silently ignored. False by default: a {@link + * ParseException} will happen. + */ + protected boolean tolerateRepRuleCountMismatches() { + return false; + } + /** * Whether duplicate ICONV/OCONV lines should be silently ignored. False by default: an {@link * IllegalStateException} will happen. diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries.java index 9b23a81b32b..cdede3092b1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries.java @@ -77,6 +77,11 @@ public class TestAllDictionaries extends LuceneTestCase { protected boolean tolerateAffixRuleCountMismatches() { return true; } + + @Override + protected boolean tolerateRepRuleCountMismatches() { + return true; + } }; } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java index 28cfb1330ed..1aab0164121 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java @@ -207,6 +207,11 @@ public class TestDictionary extends LuceneTestCase { return true; } + @Override + protected boolean tolerateRepRuleCountMismatches() { + return true; + } + @Override protected boolean tolerateDuplicateConversionMappings() { return true; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/forgivable-errors.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/forgivable-errors.aff index cb093496691..0c5d22c26bb 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/forgivable-errors.aff +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/forgivable-errors.aff @@ -1,4 +1,4 @@ -REP 1 +REP 0 REP foo bar goo doo zoo COMPOUNDWORDMAX 2 y @@ -16,4 +16,4 @@ SFX A b c d ICONV 2 ICONV x y -ICONV x y \ No newline at end of file +ICONV x y