From a919f2df59f30857e07266efe1097795f957ff28 Mon Sep 17 00:00:00 2001 From: Peter Gromov Date: Thu, 11 Feb 2021 09:13:34 +0100 Subject: [PATCH] LUCENE-9761: Hunspell: check that FLAG and SET don't occur too far in the file, cleanup (#2348) Thank you! --- .../lucene/analysis/hunspell/Dictionary.java | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java index 99f60b698d9..e9a638441c1 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java @@ -233,7 +233,7 @@ public class Dictionary { try (BufferedInputStream affixStream = new BufferedInputStream(affix, MAX_PROLOGUE_SCAN_WINDOW) { @Override - public void close() throws IOException { + public void close() { // TODO: maybe we should consume and close it? Why does it need to stay open? // Don't close the affix stream as per javadoc. } @@ -466,11 +466,12 @@ public class Dictionary { new CheckCompoundPattern(reader.readLine(), flagParsingStrategy, this)); } } else if ("SET".equals(firstWord)) { - // We could add some sanity-checking whether set command is identical to what was - // parsed in readConfig. This would handle cases of flags too far in the file or - // duplicated (both are incorrect, I assume). + checkCriticalDirectiveSame( + "SET", reader, decoder.charset(), getDecoder(singleArgument(reader, line)).charset()); } else if ("FLAG".equals(firstWord)) { - // Similar for FLAG. + FlagParsingStrategy strategy = getFlagParsingStrategy(line, decoder.charset()); + checkCriticalDirectiveSame( + "FLAG", reader, flagParsingStrategy.getClass(), strategy.getClass()); } } @@ -494,6 +495,19 @@ public class Dictionary { stripOffsets[currentIndex] = currentOffset; } + private void checkCriticalDirectiveSame( + String directive, LineNumberReader reader, Object expected, Object actual) + throws ParseException { + if (!expected.equals(actual)) { + throw new ParseException( + directive + + " directive should occur at most once, and in the first " + + MAX_PROLOGUE_SCAN_WINDOW + + " bytes of the *.aff file", + reader.getLineNumber()); + } + } + private List parseMapEntry(LineNumberReader reader, String line) throws ParseException { String unparsed = firstArgument(reader, line); List mapEntry = new ArrayList<>();