mirror of https://github.com/apache/lucene.git
hunspell: tolerate REP rule count mismatches (#14079)
Similar to support for tolerating PFX/SFX count mismatches, add the ability to tolerate REP count mismatches. The issue arises in recent updates to LibreOffice mongolian dictionary and is currently failing all PRs that change the analyzers: https://bugs.documentfoundation.org/show_bug.cgi?id=164366
This commit is contained in:
parent
c9b4bcdced
commit
0088308d7c
|
@ -44,7 +44,9 @@ New Features
|
||||||
|
|
||||||
Improvements
|
Improvements
|
||||||
---------------------
|
---------------------
|
||||||
(No changes)
|
|
||||||
|
* GITHUB#14079: Hunspell Dictionary now supports an option to tolerate REP rule count mismatches.
|
||||||
|
(Robert Muir)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
---------------------
|
---------------------
|
||||||
|
|
|
@ -405,10 +405,18 @@ public class Dictionary {
|
||||||
} else if ("TRY".equals(firstWord)) {
|
} else if ("TRY".equals(firstWord)) {
|
||||||
tryChars = firstArgument(reader, line);
|
tryChars = firstArgument(reader, line);
|
||||||
} else if ("REP".equals(firstWord)) {
|
} else if ("REP".equals(firstWord)) {
|
||||||
int count = parseNum(reader, line);
|
if (tolerateRepRuleCountMismatches()) {
|
||||||
for (int i = 0; i < count; i++) {
|
String[] parts = splitBySpace(reader, line, 2, Integer.MAX_VALUE);
|
||||||
String[] parts = splitBySpace(reader, reader.readLine(), 3, Integer.MAX_VALUE);
|
// ignore REP N, as actual N may be incorrect
|
||||||
repTable.add(new RepEntry(parts[1], parts[2]));
|
if (parts.length >= 3) {
|
||||||
|
repTable.add(new RepEntry(parts[1], parts[2]));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int count = parseNum(reader, line);
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
String[] parts = splitBySpace(reader, reader.readLine(), 3, Integer.MAX_VALUE);
|
||||||
|
repTable.add(new RepEntry(parts[1], parts[2]));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else if ("MAP".equals(firstWord)) {
|
} else if ("MAP".equals(firstWord)) {
|
||||||
int count = parseNum(reader, line);
|
int count = parseNum(reader, line);
|
||||||
|
@ -1168,6 +1176,14 @@ public class Dictionary {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether incorrect REP rule counts will be silently ignored. False by default: a {@link
|
||||||
|
* ParseException} will happen.
|
||||||
|
*/
|
||||||
|
protected boolean tolerateRepRuleCountMismatches() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Whether duplicate ICONV/OCONV lines should be silently ignored. False by default: an {@link
|
* Whether duplicate ICONV/OCONV lines should be silently ignored. False by default: an {@link
|
||||||
* IllegalStateException} will happen.
|
* IllegalStateException} will happen.
|
||||||
|
|
|
@ -77,6 +77,11 @@ public class TestAllDictionaries extends LuceneTestCase {
|
||||||
protected boolean tolerateAffixRuleCountMismatches() {
|
protected boolean tolerateAffixRuleCountMismatches() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean tolerateRepRuleCountMismatches() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -207,6 +207,11 @@ public class TestDictionary extends LuceneTestCase {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean tolerateRepRuleCountMismatches() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected boolean tolerateDuplicateConversionMappings() {
|
protected boolean tolerateDuplicateConversionMappings() {
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
REP 1
|
REP 0
|
||||||
REP foo bar goo doo zoo
|
REP foo bar goo doo zoo
|
||||||
|
|
||||||
COMPOUNDWORDMAX 2 y
|
COMPOUNDWORDMAX 2 y
|
||||||
|
@ -16,4 +16,4 @@ SFX A b c d
|
||||||
|
|
||||||
ICONV 2
|
ICONV 2
|
||||||
ICONV x y
|
ICONV x y
|
||||||
ICONV x y
|
ICONV x y
|
||||||
|
|
Loading…
Reference in New Issue