hunspell: tolerate REP rule count mismatches (#14079)

Similar to support for tolerating PFX/SFX count mismatches, add the
ability to tolerate REP count mismatches.

The issue arises in recent updates to LibreOffice mongolian dictionary
and is currently failing all PRs that change the analyzers:

https://bugs.documentfoundation.org/show_bug.cgi?id=164366
This commit is contained in:
Robert Muir 2024-12-18 09:39:49 -05:00 committed by GitHub
parent c9b4bcdced
commit 0088308d7c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 35 additions and 7 deletions

View File

@ -44,7 +44,9 @@ New Features
Improvements Improvements
--------------------- ---------------------
(No changes)
* GITHUB#14079: Hunspell Dictionary now supports an option to tolerate REP rule count mismatches.
(Robert Muir)
Optimizations Optimizations
--------------------- ---------------------

View File

@ -405,10 +405,18 @@ public class Dictionary {
} else if ("TRY".equals(firstWord)) { } else if ("TRY".equals(firstWord)) {
tryChars = firstArgument(reader, line); tryChars = firstArgument(reader, line);
} else if ("REP".equals(firstWord)) { } else if ("REP".equals(firstWord)) {
int count = parseNum(reader, line); if (tolerateRepRuleCountMismatches()) {
for (int i = 0; i < count; i++) { String[] parts = splitBySpace(reader, line, 2, Integer.MAX_VALUE);
String[] parts = splitBySpace(reader, reader.readLine(), 3, Integer.MAX_VALUE); // ignore REP N, as actual N may be incorrect
repTable.add(new RepEntry(parts[1], parts[2])); if (parts.length >= 3) {
repTable.add(new RepEntry(parts[1], parts[2]));
}
} else {
int count = parseNum(reader, line);
for (int i = 0; i < count; i++) {
String[] parts = splitBySpace(reader, reader.readLine(), 3, Integer.MAX_VALUE);
repTable.add(new RepEntry(parts[1], parts[2]));
}
} }
} else if ("MAP".equals(firstWord)) { } else if ("MAP".equals(firstWord)) {
int count = parseNum(reader, line); int count = parseNum(reader, line);
@ -1168,6 +1176,14 @@ public class Dictionary {
return false; return false;
} }
/**
* Whether incorrect REP rule counts will be silently ignored. False by default: a {@link
* ParseException} will happen.
*/
protected boolean tolerateRepRuleCountMismatches() {
return false;
}
/** /**
* Whether duplicate ICONV/OCONV lines should be silently ignored. False by default: an {@link * Whether duplicate ICONV/OCONV lines should be silently ignored. False by default: an {@link
* IllegalStateException} will happen. * IllegalStateException} will happen.

View File

@ -77,6 +77,11 @@ public class TestAllDictionaries extends LuceneTestCase {
protected boolean tolerateAffixRuleCountMismatches() { protected boolean tolerateAffixRuleCountMismatches() {
return true; return true;
} }
@Override
protected boolean tolerateRepRuleCountMismatches() {
return true;
}
}; };
} }
} }

View File

@ -207,6 +207,11 @@ public class TestDictionary extends LuceneTestCase {
return true; return true;
} }
@Override
protected boolean tolerateRepRuleCountMismatches() {
return true;
}
@Override @Override
protected boolean tolerateDuplicateConversionMappings() { protected boolean tolerateDuplicateConversionMappings() {
return true; return true;

View File

@ -1,4 +1,4 @@
REP 1 REP 0
REP foo bar goo doo zoo REP foo bar goo doo zoo
COMPOUNDWORDMAX 2 y COMPOUNDWORDMAX 2 y
@ -16,4 +16,4 @@ SFX A b c d
ICONV 2 ICONV 2
ICONV x y ICONV x y
ICONV x y ICONV x y