mirror of https://github.com/apache/lucene.git
LUCENE-9724: Hunspell: tolerate existing aff/dic file typos (#2307)
This commit is contained in:
parent
1852d7ad5a
commit
1cc26b6bb4
|
@ -376,7 +376,7 @@ public class Dictionary {
|
||||||
Arrays.sort(ignore);
|
Arrays.sort(ignore);
|
||||||
needsInputCleaning = true;
|
needsInputCleaning = true;
|
||||||
} else if ("ICONV".equals(firstWord) || "OCONV".equals(firstWord)) {
|
} else if ("ICONV".equals(firstWord) || "OCONV".equals(firstWord)) {
|
||||||
int num = Integer.parseInt(singleArgument(reader, line));
|
int num = parseNum(reader, line);
|
||||||
FST<CharsRef> res = parseConversions(reader, num);
|
FST<CharsRef> res = parseConversions(reader, num);
|
||||||
if (line.startsWith("I")) {
|
if (line.startsWith("I")) {
|
||||||
iconv = res;
|
iconv = res;
|
||||||
|
@ -397,9 +397,9 @@ public class Dictionary {
|
||||||
} else if ("TRY".equals(firstWord)) {
|
} else if ("TRY".equals(firstWord)) {
|
||||||
tryChars = singleArgument(reader, line);
|
tryChars = singleArgument(reader, line);
|
||||||
} else if ("REP".equals(firstWord)) {
|
} else if ("REP".equals(firstWord)) {
|
||||||
int count = Integer.parseInt(singleArgument(reader, line));
|
int count = parseNum(reader, line);
|
||||||
for (int i = 0; i < count; i++) {
|
for (int i = 0; i < count; i++) {
|
||||||
String[] parts = splitBySpace(reader, reader.readLine(), 3);
|
String[] parts = splitBySpace(reader, reader.readLine(), 3, Integer.MAX_VALUE);
|
||||||
repTable.add(new RepEntry(parts[1], parts[2]));
|
repTable.add(new RepEntry(parts[1], parts[2]));
|
||||||
}
|
}
|
||||||
} else if ("KEY".equals(firstWord)) {
|
} else if ("KEY".equals(firstWord)) {
|
||||||
|
@ -409,11 +409,11 @@ public class Dictionary {
|
||||||
} else if ("FORBIDDENWORD".equals(firstWord)) {
|
} else if ("FORBIDDENWORD".equals(firstWord)) {
|
||||||
forbiddenword = flagParsingStrategy.parseFlag(singleArgument(reader, line));
|
forbiddenword = flagParsingStrategy.parseFlag(singleArgument(reader, line));
|
||||||
} else if ("COMPOUNDMIN".equals(firstWord)) {
|
} else if ("COMPOUNDMIN".equals(firstWord)) {
|
||||||
compoundMin = Math.max(1, Integer.parseInt(singleArgument(reader, line)));
|
compoundMin = Math.max(1, parseNum(reader, line));
|
||||||
} else if ("COMPOUNDWORDMAX".equals(firstWord)) {
|
} else if ("COMPOUNDWORDMAX".equals(firstWord)) {
|
||||||
compoundMax = Math.max(1, Integer.parseInt(singleArgument(reader, line)));
|
compoundMax = Math.max(1, parseNum(reader, line));
|
||||||
} else if ("COMPOUNDRULE".equals(firstWord)) {
|
} else if ("COMPOUNDRULE".equals(firstWord)) {
|
||||||
compoundRules = parseCompoundRules(reader, Integer.parseInt(singleArgument(reader, line)));
|
compoundRules = parseCompoundRules(reader, parseNum(reader, line));
|
||||||
} else if ("COMPOUNDFLAG".equals(firstWord)) {
|
} else if ("COMPOUNDFLAG".equals(firstWord)) {
|
||||||
compoundFlag = flagParsingStrategy.parseFlag(singleArgument(reader, line));
|
compoundFlag = flagParsingStrategy.parseFlag(singleArgument(reader, line));
|
||||||
} else if ("COMPOUNDBEGIN".equals(firstWord)) {
|
} else if ("COMPOUNDBEGIN".equals(firstWord)) {
|
||||||
|
@ -437,7 +437,7 @@ public class Dictionary {
|
||||||
} else if ("SIMPLIFIEDTRIPLE".equals(firstWord)) {
|
} else if ("SIMPLIFIEDTRIPLE".equals(firstWord)) {
|
||||||
simplifiedTriple = true;
|
simplifiedTriple = true;
|
||||||
} else if ("CHECKCOMPOUNDPATTERN".equals(firstWord)) {
|
} else if ("CHECKCOMPOUNDPATTERN".equals(firstWord)) {
|
||||||
int count = Integer.parseInt(singleArgument(reader, line));
|
int count = parseNum(reader, line);
|
||||||
for (int i = 0; i < count; i++) {
|
for (int i = 0; i < count; i++) {
|
||||||
checkCompoundPatterns.add(
|
checkCompoundPatterns.add(
|
||||||
new CheckCompoundPattern(reader.readLine(), flagParsingStrategy, this));
|
new CheckCompoundPattern(reader.readLine(), flagParsingStrategy, this));
|
||||||
|
@ -481,16 +481,24 @@ public class Dictionary {
|
||||||
return underscore < 0 ? isoCode : isoCode.substring(0, underscore);
|
return underscore < 0 ? isoCode : isoCode.substring(0, underscore);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private int parseNum(LineNumberReader reader, String line) throws ParseException {
|
||||||
|
return Integer.parseInt(splitBySpace(reader, line, 2, Integer.MAX_VALUE)[1]);
|
||||||
|
}
|
||||||
|
|
||||||
private String singleArgument(LineNumberReader reader, String line) throws ParseException {
|
private String singleArgument(LineNumberReader reader, String line) throws ParseException {
|
||||||
return splitBySpace(reader, line, 2)[1];
|
return splitBySpace(reader, line, 2)[1];
|
||||||
}
|
}
|
||||||
|
|
||||||
private String[] splitBySpace(LineNumberReader reader, String line, int expectedParts)
|
private String[] splitBySpace(LineNumberReader reader, String line, int expectedParts)
|
||||||
throws ParseException {
|
throws ParseException {
|
||||||
|
return splitBySpace(reader, line, expectedParts, expectedParts);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String[] splitBySpace(LineNumberReader reader, String line, int minParts, int maxParts)
|
||||||
|
throws ParseException {
|
||||||
String[] parts = line.split("\\s+");
|
String[] parts = line.split("\\s+");
|
||||||
if (parts.length < expectedParts
|
if (parts.length < minParts || parts.length > maxParts && !parts[maxParts].startsWith("#")) {
|
||||||
|| parts.length > expectedParts && !parts[expectedParts].startsWith("#")) {
|
throw new ParseException("Invalid syntax: " + line, reader.getLineNumber());
|
||||||
throw new ParseException("Invalid syntax", reader.getLineNumber());
|
|
||||||
}
|
}
|
||||||
return parts;
|
return parts;
|
||||||
}
|
}
|
||||||
|
@ -509,7 +517,7 @@ public class Dictionary {
|
||||||
Set<String> starting = new LinkedHashSet<>();
|
Set<String> starting = new LinkedHashSet<>();
|
||||||
Set<String> ending = new LinkedHashSet<>();
|
Set<String> ending = new LinkedHashSet<>();
|
||||||
Set<String> middle = new LinkedHashSet<>();
|
Set<String> middle = new LinkedHashSet<>();
|
||||||
int num = Integer.parseInt(singleArgument(reader, line));
|
int num = parseNum(reader, line);
|
||||||
for (int i = 0; i < num; i++) {
|
for (int i = 0; i < num; i++) {
|
||||||
String breakStr = singleArgument(reader, reader.readLine());
|
String breakStr = singleArgument(reader, reader.readLine());
|
||||||
if (breakStr.startsWith("^")) {
|
if (breakStr.startsWith("^")) {
|
||||||
|
@ -590,15 +598,8 @@ public class Dictionary {
|
||||||
|
|
||||||
for (int i = 0; i < numLines; i++) {
|
for (int i = 0; i < numLines; i++) {
|
||||||
String line = reader.readLine();
|
String line = reader.readLine();
|
||||||
String[] ruleArgs = line.split("\\s+");
|
|
||||||
|
|
||||||
// from the manpage: PFX flag stripping prefix [condition [morphological_fields...]]
|
// from the manpage: PFX flag stripping prefix [condition [morphological_fields...]]
|
||||||
// condition is optional
|
String[] ruleArgs = splitBySpace(reader, line, 4, Integer.MAX_VALUE);
|
||||||
if (ruleArgs.length < 4) {
|
|
||||||
throw new ParseException(
|
|
||||||
"The affix file contains a rule with less than four elements: " + line,
|
|
||||||
reader.getLineNumber());
|
|
||||||
}
|
|
||||||
|
|
||||||
char flag = flagParsingStrategy.parseFlag(ruleArgs[1]);
|
char flag = flagParsingStrategy.parseFlag(ruleArgs[1]);
|
||||||
String strip = ruleArgs[2].equals("0") ? "" : ruleArgs[2];
|
String strip = ruleArgs[2].equals("0") ? "" : ruleArgs[2];
|
||||||
|
@ -654,9 +655,11 @@ public class Dictionary {
|
||||||
"Too many patterns, please report this to dev@lucene.apache.org");
|
"Too many patterns, please report this to dev@lucene.apache.org");
|
||||||
}
|
}
|
||||||
seenPatterns.put(regex, patternIndex);
|
seenPatterns.put(regex, patternIndex);
|
||||||
CharacterRunAutomaton pattern =
|
try {
|
||||||
new CharacterRunAutomaton(new RegExp(regex, RegExp.NONE).toAutomaton());
|
patterns.add(new CharacterRunAutomaton(conditionRegexp(regex).toAutomaton()));
|
||||||
patterns.add(pattern);
|
} catch (IllegalArgumentException e) {
|
||||||
|
throw new IllegalArgumentException("On line " + reader.getLineNumber() + ": " + line, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Integer stripOrd = seenStrips.get(strip);
|
Integer stripOrd = seenStrips.get(strip);
|
||||||
|
@ -706,6 +709,17 @@ public class Dictionary {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static RegExp conditionRegexp(String regex) {
|
||||||
|
try {
|
||||||
|
return new RegExp(regex, RegExp.NONE);
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
if (e.getMessage().contains("expected ']'")) {
|
||||||
|
return conditionRegexp(regex + "]");
|
||||||
|
}
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
char affixData(int affixIndex, int offset) {
|
char affixData(int affixIndex, int offset) {
|
||||||
return affixData[affixIndex * 4 + offset];
|
return affixData[affixIndex * 4 + offset];
|
||||||
}
|
}
|
||||||
|
@ -752,6 +766,8 @@ public class Dictionary {
|
||||||
LineNumberReader reader = new LineNumberReader(new InputStreamReader(stream, streamCharset));
|
LineNumberReader reader = new LineNumberReader(new InputStreamReader(stream, streamCharset));
|
||||||
String line;
|
String line;
|
||||||
while ((line = reader.readLine()) != null) {
|
while ((line = reader.readLine()) != null) {
|
||||||
|
if (line.isBlank()) continue;
|
||||||
|
|
||||||
String firstWord = line.split("\\s")[0];
|
String firstWord = line.split("\\s")[0];
|
||||||
if ("SET".equals(firstWord)) {
|
if ("SET".equals(firstWord)) {
|
||||||
decoder = getDecoder(singleArgument(reader, line));
|
decoder = getDecoder(singleArgument(reader, line));
|
||||||
|
@ -767,11 +783,12 @@ public class Dictionary {
|
||||||
*
|
*
|
||||||
* @return {@code true} if the sequence matched and has been consumed.
|
* @return {@code true} if the sequence matched and has been consumed.
|
||||||
*/
|
*/
|
||||||
|
@SuppressWarnings("SameParameterValue")
|
||||||
private static boolean maybeConsume(BufferedInputStream stream, byte[] bytes) throws IOException {
|
private static boolean maybeConsume(BufferedInputStream stream, byte[] bytes) throws IOException {
|
||||||
stream.mark(bytes.length);
|
stream.mark(bytes.length);
|
||||||
for (int i = 0; i < bytes.length; i++) {
|
for (byte b : bytes) {
|
||||||
int nextByte = stream.read();
|
int nextByte = stream.read();
|
||||||
if (nextByte != (bytes[i] & 0xff)) { // covers EOF (-1) as well.
|
if (nextByte != (b & 0xff)) { // covers EOF (-1) as well.
|
||||||
stream.reset();
|
stream.reset();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -1344,6 +1361,9 @@ public class Dictionary {
|
||||||
|
|
||||||
/** Abstraction of the process of parsing flags taken from the affix and dic files */
|
/** Abstraction of the process of parsing flags taken from the affix and dic files */
|
||||||
abstract static class FlagParsingStrategy {
|
abstract static class FlagParsingStrategy {
|
||||||
|
// we don't check the flag count, as Hunspell accepts longer sequences
|
||||||
|
// https://github.com/hunspell/hunspell/issues/707
|
||||||
|
static final boolean checkFlags = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parses the given String into a single flag
|
* Parses the given String into a single flag
|
||||||
|
@ -1353,7 +1373,7 @@ public class Dictionary {
|
||||||
*/
|
*/
|
||||||
char parseFlag(String rawFlag) {
|
char parseFlag(String rawFlag) {
|
||||||
char[] flags = parseFlags(rawFlag);
|
char[] flags = parseFlags(rawFlag);
|
||||||
if (flags.length != 1) {
|
if (checkFlags && flags.length != 1) {
|
||||||
throw new IllegalArgumentException("expected only one flag, got: " + rawFlag);
|
throw new IllegalArgumentException("expected only one flag, got: " + rawFlag);
|
||||||
}
|
}
|
||||||
return flags[0];
|
return flags[0];
|
||||||
|
@ -1406,7 +1426,8 @@ public class Dictionary {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
int flag = Integer.parseInt(replacement);
|
int flag = Integer.parseInt(replacement);
|
||||||
if (flag == FLAG_UNSET || flag >= Character.MAX_VALUE) { // read default flags as well
|
if (flag >= Character.MAX_VALUE) { // read default flags as well
|
||||||
|
// accept 0 due to https://github.com/hunspell/hunspell/issues/708
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
"Num flags should be between 0 and " + DEFAULT_FLAGS + ", found " + flag);
|
"Num flags should be between 0 and " + DEFAULT_FLAGS + ", found " + flag);
|
||||||
}
|
}
|
||||||
|
@ -1428,28 +1449,21 @@ public class Dictionary {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public char[] parseFlags(String rawFlags) {
|
public char[] parseFlags(String rawFlags) {
|
||||||
if (rawFlags.length() == 0) {
|
if (checkFlags && rawFlags.length() % 2 == 1) {
|
||||||
return new char[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
StringBuilder builder = new StringBuilder();
|
|
||||||
if (rawFlags.length() % 2 == 1) {
|
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
"Invalid flags (should be even number of characters): " + rawFlags);
|
"Invalid flags (should be even number of characters): " + rawFlags);
|
||||||
}
|
}
|
||||||
for (int i = 0; i < rawFlags.length(); i += 2) {
|
|
||||||
char f1 = rawFlags.charAt(i);
|
char[] flags = new char[rawFlags.length() / 2];
|
||||||
char f2 = rawFlags.charAt(i + 1);
|
for (int i = 0; i < flags.length; i++) {
|
||||||
|
char f1 = rawFlags.charAt(i * 2);
|
||||||
|
char f2 = rawFlags.charAt(i * 2 + 1);
|
||||||
if (f1 >= 256 || f2 >= 256) {
|
if (f1 >= 256 || f2 >= 256) {
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
"Invalid flags (LONG flags must be double ASCII): " + rawFlags);
|
"Invalid flags (LONG flags must be double ASCII): " + rawFlags);
|
||||||
}
|
}
|
||||||
char combined = (char) (f1 << 8 | f2);
|
flags[i] = (char) (f1 << 8 | f2);
|
||||||
builder.append(combined);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
char[] flags = new char[builder.length()];
|
|
||||||
builder.getChars(0, builder.length(), flags, 0);
|
|
||||||
return flags;
|
return flags;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,11 +39,7 @@ import org.junit.Test;
|
||||||
public class TestDictionary extends LuceneTestCase {
|
public class TestDictionary extends LuceneTestCase {
|
||||||
|
|
||||||
public void testSimpleDictionary() throws Exception {
|
public void testSimpleDictionary() throws Exception {
|
||||||
InputStream affixStream = getClass().getResourceAsStream("simple.aff");
|
Dictionary dictionary = loadDictionary("simple.aff", "simple.dic");
|
||||||
InputStream dictStream = getClass().getResourceAsStream("simple.dic");
|
|
||||||
Directory tempDir = getDirectory();
|
|
||||||
|
|
||||||
Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
|
|
||||||
assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}).length);
|
assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}).length);
|
||||||
assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length);
|
assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length);
|
||||||
IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3);
|
IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3);
|
||||||
|
@ -60,85 +56,44 @@ public class TestDictionary extends LuceneTestCase {
|
||||||
assertEquals(1, ordList.length);
|
assertEquals(1, ordList.length);
|
||||||
flags = dictionary.decodeFlags(ordList.ints[0], ref);
|
flags = dictionary.decodeFlags(ordList.ints[0], ref);
|
||||||
assertEquals(1, flags.length);
|
assertEquals(1, flags.length);
|
||||||
|
|
||||||
affixStream.close();
|
|
||||||
dictStream.close();
|
|
||||||
tempDir.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCompressedDictionary() throws Exception {
|
public void testCompressedDictionary() throws Exception {
|
||||||
InputStream affixStream = getClass().getResourceAsStream("compressed.aff");
|
Dictionary dictionary = loadDictionary("compressed.aff", "compressed.dic");
|
||||||
InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
|
|
||||||
|
|
||||||
Directory tempDir = getDirectory();
|
|
||||||
Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
|
|
||||||
assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}).length);
|
assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}).length);
|
||||||
assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length);
|
assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length);
|
||||||
IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3);
|
IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3);
|
||||||
BytesRef ref = new BytesRef();
|
BytesRef ref = new BytesRef();
|
||||||
char[] flags = dictionary.decodeFlags(ordList.ints[0], ref);
|
char[] flags = dictionary.decodeFlags(ordList.ints[0], ref);
|
||||||
assertEquals(1, flags.length);
|
assertEquals(1, flags.length);
|
||||||
|
|
||||||
affixStream.close();
|
|
||||||
dictStream.close();
|
|
||||||
tempDir.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCompressedBeforeSetDictionary() throws Exception {
|
public void testCompressedBeforeSetDictionary() throws Exception {
|
||||||
InputStream affixStream = getClass().getResourceAsStream("compressed-before-set.aff");
|
Dictionary dictionary = loadDictionary("compressed-before-set.aff", "compressed.dic");
|
||||||
InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
|
|
||||||
Directory tempDir = getDirectory();
|
|
||||||
|
|
||||||
Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
|
|
||||||
assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}).length);
|
assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}).length);
|
||||||
assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length);
|
assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length);
|
||||||
IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3);
|
IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3);
|
||||||
BytesRef ref = new BytesRef();
|
BytesRef ref = new BytesRef();
|
||||||
char[] flags = dictionary.decodeFlags(ordList.ints[0], ref);
|
char[] flags = dictionary.decodeFlags(ordList.ints[0], ref);
|
||||||
assertEquals(1, flags.length);
|
assertEquals(1, flags.length);
|
||||||
|
|
||||||
affixStream.close();
|
|
||||||
dictStream.close();
|
|
||||||
tempDir.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCompressedEmptyAliasDictionary() throws Exception {
|
public void testCompressedEmptyAliasDictionary() throws Exception {
|
||||||
InputStream affixStream = getClass().getResourceAsStream("compressed-empty-alias.aff");
|
Dictionary dictionary = loadDictionary("compressed-empty-alias.aff", "compressed.dic");
|
||||||
InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
|
|
||||||
Directory tempDir = getDirectory();
|
|
||||||
|
|
||||||
Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
|
|
||||||
assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}).length);
|
assertEquals(3, dictionary.lookupSuffix(new char[] {'e'}).length);
|
||||||
assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length);
|
assertEquals(1, dictionary.lookupPrefix(new char[] {'s'}).length);
|
||||||
IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3);
|
IntsRef ordList = dictionary.lookupWord(new char[] {'o', 'l', 'r'}, 0, 3);
|
||||||
BytesRef ref = new BytesRef();
|
BytesRef ref = new BytesRef();
|
||||||
char[] flags = dictionary.decodeFlags(ordList.ints[0], ref);
|
char[] flags = dictionary.decodeFlags(ordList.ints[0], ref);
|
||||||
assertEquals(1, flags.length);
|
assertEquals(1, flags.length);
|
||||||
|
|
||||||
affixStream.close();
|
|
||||||
dictStream.close();
|
|
||||||
tempDir.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// malformed rule causes ParseException
|
// malformed rule causes ParseException
|
||||||
public void testInvalidData() throws Exception {
|
public void testInvalidData() {
|
||||||
InputStream affixStream = getClass().getResourceAsStream("broken.aff");
|
|
||||||
InputStream dictStream = getClass().getResourceAsStream("simple.dic");
|
|
||||||
Directory tempDir = getDirectory();
|
|
||||||
|
|
||||||
ParseException expected =
|
ParseException expected =
|
||||||
expectThrows(
|
expectThrows(ParseException.class, () -> loadDictionary("broken.aff", "simple.dic"));
|
||||||
ParseException.class,
|
assertTrue(expected.getMessage().startsWith("Invalid syntax"));
|
||||||
() -> new Dictionary(tempDir, "dictionary", affixStream, dictStream));
|
|
||||||
assertTrue(
|
|
||||||
expected
|
|
||||||
.getMessage()
|
|
||||||
.startsWith("The affix file contains a rule with less than four elements"));
|
|
||||||
assertEquals(24, expected.getErrorOffset());
|
assertEquals(24, expected.getErrorOffset());
|
||||||
|
|
||||||
affixStream.close();
|
|
||||||
dictStream.close();
|
|
||||||
tempDir.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testUsingFlagsBeforeFlagDirective() throws IOException, ParseException {
|
public void testUsingFlagsBeforeFlagDirective() throws IOException, ParseException {
|
||||||
|
@ -155,20 +110,21 @@ public class TestDictionary extends LuceneTestCase {
|
||||||
assertEquals(42, dictionary.keepcase);
|
assertEquals(42, dictionary.keepcase);
|
||||||
}
|
}
|
||||||
|
|
||||||
// malformed flags causes ParseException
|
public void testForgivableErrors() throws Exception {
|
||||||
public void testInvalidFlags() throws Exception {
|
Dictionary dictionary = loadDictionary("forgivable-errors.aff", "simple.dic");
|
||||||
InputStream affixStream = getClass().getResourceAsStream("broken-flags.aff");
|
assertEquals(1, dictionary.repTable.size());
|
||||||
InputStream dictStream = getClass().getResourceAsStream("simple.dic");
|
assertEquals(2, dictionary.compoundMax);
|
||||||
Directory tempDir = getDirectory();
|
|
||||||
|
|
||||||
Exception expected =
|
loadDictionary("forgivable-errors-long.aff", "single-word.dic");
|
||||||
expectThrows(
|
loadDictionary("forgivable-errors-num.aff", "single-word.dic");
|
||||||
Exception.class, () -> new Dictionary(tempDir, "dictionary", affixStream, dictStream));
|
}
|
||||||
assertTrue(expected.getMessage().startsWith("expected only one flag"));
|
|
||||||
|
|
||||||
affixStream.close();
|
private Dictionary loadDictionary(String aff, String dic) throws IOException, ParseException {
|
||||||
dictStream.close();
|
try (InputStream affixStream = getClass().getResourceAsStream(aff);
|
||||||
tempDir.close();
|
InputStream dicStream = getClass().getResourceAsStream(dic);
|
||||||
|
Directory tempDir = getDirectory()) {
|
||||||
|
return new Dictionary(tempDir, "dictionary", affixStream, dicStream);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class CloseCheckInputStream extends FilterInputStream {
|
private static class CloseCheckInputStream extends FilterInputStream {
|
||||||
|
|
|
@ -1,21 +0,0 @@
|
||||||
SET UTF-8
|
|
||||||
TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
|
|
||||||
|
|
||||||
SFX A Y 3
|
|
||||||
SFX A 0 e n
|
|
||||||
SFX A 0 e t
|
|
||||||
SFX A 0 e h
|
|
||||||
|
|
||||||
SFX C Y 2
|
|
||||||
SFX C 0 d/C c
|
|
||||||
SFX C 0 c b
|
|
||||||
|
|
||||||
SFX D Y 1
|
|
||||||
SFX D 0 s o
|
|
||||||
|
|
||||||
SFX E Y 1
|
|
||||||
SFX E 0 d o
|
|
||||||
|
|
||||||
# broken, the flag has too much in it
|
|
||||||
PFX B0 Y 1
|
|
||||||
PFX B0 0 s o
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
FLAG long
|
||||||
|
|
||||||
|
SFX A10 Y 1
|
||||||
|
SFX A10 nout l .
|
|
@ -0,0 +1,4 @@
|
||||||
|
FLAG num
|
||||||
|
|
||||||
|
SFX 0 Y 1
|
||||||
|
SFX 0 nout l .
|
|
@ -0,0 +1,9 @@
|
||||||
|
REP 1
|
||||||
|
REP foo bar goo doo zoo
|
||||||
|
|
||||||
|
COMPOUNDWORDMAX 2 y
|
||||||
|
|
||||||
|
KEEPCASE Aa
|
||||||
|
|
||||||
|
SFX A Y 1
|
||||||
|
SFX A nout l [aeiouyáéíóúýůěr][^aeiouyáéíóúýůěrl][^aeiouy
|
|
@ -0,0 +1,2 @@
|
||||||
|
1
|
||||||
|
foo
|
Loading…
Reference in New Issue