From 2c98dd5b320bdf829dac3ab405d81323706a472b Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Tue, 11 Mar 2014 15:52:38 +0000 Subject: [PATCH] LUCENE-5517: stricter parsing for hunspell parseFlag git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1576389 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/analysis/hunspell/Dictionary.java | 6 +++++- .../hunspell/TestAllDictionaries.java | 2 +- .../hunspell/TestAllDictionaries2.java | 4 ++-- .../analysis/hunspell/TestDictionary.java | 16 ++++++++++++++ .../lucene/analysis/hunspell/broken-flags.aff | 21 +++++++++++++++++++ 5 files changed, 45 insertions(+), 4 deletions(-) create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/broken-flags.aff diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java index 0c8ad44cf16..733af05e2a0 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java @@ -822,7 +822,11 @@ public class Dictionary { * @return Parsed flag */ char parseFlag(String rawFlag) { - return parseFlags(rawFlag)[0]; + char flags[] = parseFlags(rawFlag); + if (flags.length != 1) { + throw new IllegalArgumentException("expected only one flag, got: " + rawFlag); + } + return flags[0]; } /** diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries.java index 3322eb109a6..256d6215278 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries.java @@ -47,7 +47,7 @@ public class TestAllDictionaries extends LuceneTestCase { "bg_BG.zip", "bg_BG.dic", "bg_BG.aff", "ca_ANY.zip", "catalan.dic", "catalan.aff", "ca_ES.zip", "ca_ES.dic", "ca_ES.aff", - "cop_EG.zip", "cop_EG.dic", "cop_EG.aff", +// BUG: broken flag "cop_EG.zip", "cop_EG.dic", "cop_EG.aff", "cs_CZ.zip", "cs_CZ.dic", "cs_CZ.aff", "cy_GB.zip", "cy_GB.dic", "cy_GB.aff", "da_DK.zip", "da_DK.dic", "da_DK.aff", diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java index 21d7ad62c36..85ed12892be 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java @@ -61,7 +61,7 @@ public class TestAllDictionaries2 extends LuceneTestCase { "ceske_slovniky_pro_kontrolu_pravopisu-1.0.4-tb+sm+fx.xpi", "dictionaries/cs.dic", "dictionaries/cs.aff", "chichewa_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/ny_MW.dic", "dictionaries/ny_MW.aff", "corrector_de_galego-13.10.0-fn+sm+tb+fx.xpi", "dictionaries/gl_ES.dic", "dictionaries/gl_ES.aff", - "corrector_orthographic_de_interlingua-6.0-fn+sm+tb+fx.xpi", "dictionaries/ia-ia.dic", "dictionaries/ia-ia.aff", +//BUG: broken flags "corrector_orthographic_de_interlingua-6.0-fn+sm+tb+fx.xpi", "dictionaries/ia-ia.dic", "dictionaries/ia-ia.aff", "corrector_ortografico_aragones-0.2-fx+tb+sm.xpi", "dictionaries/an_ES.dic", "dictionaries/an_ES.aff", "croatian_dictionary_-_hrvatski_rjecnik-1.0.1-firefox+thunderbird+seamonkey.xpi", "dictionaries/hr.dic", "dictionaries/hr.aff", "croatian_dictionary_hrvatski_rjecnik-1.0.9-an+fx+fn+tb+sm.xpi", "dictionaries/hr-HR.dic", "dictionaries/hr-HR.aff", @@ -149,7 +149,7 @@ public class TestAllDictionaries2 extends LuceneTestCase { //BUG: broken file (hunspell refuses to load, too) "thamizha_solthiruthitamil_spellchecker-0.8-fx+tb.xpi", "dictionaries/ta_IN.dic", "dictionaries/ta_IN.aff", "tsonga_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/ts-ZA.dic", "dictionaries/ts-ZA.aff", "tswana_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/tn-ZA.dic", "dictionaries/tn-ZA.aff", - "turkce_yazm_denetimi-3.5-sm+tb+fx.xpi", "dictionaries/tr.dic", "dictionaries/tr.aff", +//BUG: missing FLAG declaration "turkce_yazm_denetimi-3.5-sm+tb+fx.xpi", "dictionaries/tr.dic", "dictionaries/tr.aff", "turkmen_spell_checker_dictionary-0.1.6-tb+fx+sm.xpi", "dictionaries/tk_TM.dic", "dictionaries/tk_TM.aff", "ukrainian_dictionary-1.7.0-sm+an+fx+fn+tb.xpi", "dictionaries/uk-UA.dic", "dictionaries/uk-UA.aff", "united_states_english_spellchecker-7.0.1-sm+tb+fx+an.xpi", "dictionaries/en-US.dic", "dictionaries/en-US.aff", diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java index a653ddb36a2..030181e89da 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java @@ -131,6 +131,22 @@ public class TestDictionary extends LuceneTestCase { dictStream.close(); } + // malformed flags causes ParseException + public void testInvalidFlags() throws Exception { + InputStream affixStream = getClass().getResourceAsStream("broken-flags.aff"); + InputStream dictStream = getClass().getResourceAsStream("simple.dic"); + + try { + new Dictionary(affixStream, dictStream); + fail("didn't get expected exception"); + } catch (Exception expected) { + assertTrue(expected.getMessage().startsWith("expected only one flag")); + } + + affixStream.close(); + dictStream.close(); + } + private class CloseCheckInputStream extends FilterInputStream { private boolean closed = false; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/broken-flags.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/broken-flags.aff new file mode 100644 index 00000000000..0c189c845ce --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/broken-flags.aff @@ -0,0 +1,21 @@ +SET UTF-8 +TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ + +SFX A Y 3 +SFX A 0 e n +SFX A 0 e t +SFX A 0 e h + +SFX C Y 2 +SFX C 0 d/C c +SFX C 0 c b + +SFX D Y 1 +SFX D 0 s o + +SFX E Y 1 +SFX E 0 d o + +# broken, the flag has too much in it +PFX B0 Y 1 +PFX B0 0 s o