mirror of https://github.com/apache/lucene.git
hunspell: tolerate REP rule count mismatches (#14079)
Similar to support for tolerating PFX/SFX count mismatches, add the ability to tolerate REP count mismatches. The issue arises in recent updates to LibreOffice mongolian dictionary and is currently failing all PRs that change the analyzers: https://bugs.documentfoundation.org/show_bug.cgi?id=164366
This commit is contained in:
parent
c9b4bcdced
commit
0088308d7c
|
@ -44,7 +44,9 @@ New Features
|
|||
|
||||
Improvements
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
* GITHUB#14079: Hunspell Dictionary now supports an option to tolerate REP rule count mismatches.
|
||||
(Robert Muir)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
|
|
|
@ -405,10 +405,18 @@ public class Dictionary {
|
|||
} else if ("TRY".equals(firstWord)) {
|
||||
tryChars = firstArgument(reader, line);
|
||||
} else if ("REP".equals(firstWord)) {
|
||||
int count = parseNum(reader, line);
|
||||
for (int i = 0; i < count; i++) {
|
||||
String[] parts = splitBySpace(reader, reader.readLine(), 3, Integer.MAX_VALUE);
|
||||
repTable.add(new RepEntry(parts[1], parts[2]));
|
||||
if (tolerateRepRuleCountMismatches()) {
|
||||
String[] parts = splitBySpace(reader, line, 2, Integer.MAX_VALUE);
|
||||
// ignore REP N, as actual N may be incorrect
|
||||
if (parts.length >= 3) {
|
||||
repTable.add(new RepEntry(parts[1], parts[2]));
|
||||
}
|
||||
} else {
|
||||
int count = parseNum(reader, line);
|
||||
for (int i = 0; i < count; i++) {
|
||||
String[] parts = splitBySpace(reader, reader.readLine(), 3, Integer.MAX_VALUE);
|
||||
repTable.add(new RepEntry(parts[1], parts[2]));
|
||||
}
|
||||
}
|
||||
} else if ("MAP".equals(firstWord)) {
|
||||
int count = parseNum(reader, line);
|
||||
|
@ -1168,6 +1176,14 @@ public class Dictionary {
|
|||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether incorrect REP rule counts will be silently ignored. False by default: a {@link
|
||||
* ParseException} will happen.
|
||||
*/
|
||||
protected boolean tolerateRepRuleCountMismatches() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether duplicate ICONV/OCONV lines should be silently ignored. False by default: an {@link
|
||||
* IllegalStateException} will happen.
|
||||
|
|
|
@ -77,6 +77,11 @@ public class TestAllDictionaries extends LuceneTestCase {
|
|||
protected boolean tolerateAffixRuleCountMismatches() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean tolerateRepRuleCountMismatches() {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -207,6 +207,11 @@ public class TestDictionary extends LuceneTestCase {
|
|||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean tolerateRepRuleCountMismatches() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean tolerateDuplicateConversionMappings() {
|
||||
return true;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
REP 1
|
||||
REP 0
|
||||
REP foo bar goo doo zoo
|
||||
|
||||
COMPOUNDWORDMAX 2 y
|
||||
|
@ -16,4 +16,4 @@ SFX A b c d
|
|||
|
||||
ICONV 2
|
||||
ICONV x y
|
||||
ICONV x y
|
||||
ICONV x y
|
||||
|
|
Loading…
Reference in New Issue