hunspell: tolerate REP rule count mismatches (#14079)

Similar to support for tolerating PFX/SFX count mismatches, add the
ability to tolerate REP count mismatches.

The issue arises in recent updates to LibreOffice mongolian dictionary
and is currently failing all PRs that change the analyzers:

https://bugs.documentfoundation.org/show_bug.cgi?id=164366
This commit is contained in:
Robert Muir 2024-12-18 09:39:49 -05:00 committed by GitHub
parent c9b4bcdced
commit 0088308d7c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 35 additions and 7 deletions

View File

@ -44,7 +44,9 @@ New Features
Improvements
---------------------
(No changes)
* GITHUB#14079: Hunspell Dictionary now supports an option to tolerate REP rule count mismatches.
(Robert Muir)
Optimizations
---------------------

View File

@ -405,10 +405,18 @@ public class Dictionary {
} else if ("TRY".equals(firstWord)) {
tryChars = firstArgument(reader, line);
} else if ("REP".equals(firstWord)) {
int count = parseNum(reader, line);
for (int i = 0; i < count; i++) {
String[] parts = splitBySpace(reader, reader.readLine(), 3, Integer.MAX_VALUE);
repTable.add(new RepEntry(parts[1], parts[2]));
if (tolerateRepRuleCountMismatches()) {
String[] parts = splitBySpace(reader, line, 2, Integer.MAX_VALUE);
// ignore REP N, as actual N may be incorrect
if (parts.length >= 3) {
repTable.add(new RepEntry(parts[1], parts[2]));
}
} else {
int count = parseNum(reader, line);
for (int i = 0; i < count; i++) {
String[] parts = splitBySpace(reader, reader.readLine(), 3, Integer.MAX_VALUE);
repTable.add(new RepEntry(parts[1], parts[2]));
}
}
} else if ("MAP".equals(firstWord)) {
int count = parseNum(reader, line);
@ -1168,6 +1176,14 @@ public class Dictionary {
return false;
}
/**
* Whether incorrect REP rule counts will be silently ignored. False by default: a {@link
* ParseException} will happen.
*/
protected boolean tolerateRepRuleCountMismatches() {
return false;
}
/**
* Whether duplicate ICONV/OCONV lines should be silently ignored. False by default: an {@link
* IllegalStateException} will happen.

View File

@ -77,6 +77,11 @@ public class TestAllDictionaries extends LuceneTestCase {
protected boolean tolerateAffixRuleCountMismatches() {
return true;
}
@Override
protected boolean tolerateRepRuleCountMismatches() {
return true;
}
};
}
}

View File

@ -207,6 +207,11 @@ public class TestDictionary extends LuceneTestCase {
return true;
}
@Override
protected boolean tolerateRepRuleCountMismatches() {
return true;
}
@Override
protected boolean tolerateDuplicateConversionMappings() {
return true;

View File

@ -1,4 +1,4 @@
REP 1
REP 0
REP foo bar goo doo zoo
COMPOUNDWORDMAX 2 y
@ -16,4 +16,4 @@ SFX A b c d
ICONV 2
ICONV x y
ICONV x y
ICONV x y