LUCENE-5517: stricter parsing for hunspell parseFlag

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1576389 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-03-11 15:52:38 +00:00
parent 03de125b85
commit 2c98dd5b32
5 changed files with 45 additions and 4 deletions

View File

@ -822,7 +822,11 @@ public class Dictionary {
* @return Parsed flag * @return Parsed flag
*/ */
char parseFlag(String rawFlag) { char parseFlag(String rawFlag) {
return parseFlags(rawFlag)[0]; char flags[] = parseFlags(rawFlag);
if (flags.length != 1) {
throw new IllegalArgumentException("expected only one flag, got: " + rawFlag);
}
return flags[0];
} }
/** /**

View File

@ -47,7 +47,7 @@ public class TestAllDictionaries extends LuceneTestCase {
"bg_BG.zip", "bg_BG.dic", "bg_BG.aff", "bg_BG.zip", "bg_BG.dic", "bg_BG.aff",
"ca_ANY.zip", "catalan.dic", "catalan.aff", "ca_ANY.zip", "catalan.dic", "catalan.aff",
"ca_ES.zip", "ca_ES.dic", "ca_ES.aff", "ca_ES.zip", "ca_ES.dic", "ca_ES.aff",
"cop_EG.zip", "cop_EG.dic", "cop_EG.aff", // BUG: broken flag "cop_EG.zip", "cop_EG.dic", "cop_EG.aff",
"cs_CZ.zip", "cs_CZ.dic", "cs_CZ.aff", "cs_CZ.zip", "cs_CZ.dic", "cs_CZ.aff",
"cy_GB.zip", "cy_GB.dic", "cy_GB.aff", "cy_GB.zip", "cy_GB.dic", "cy_GB.aff",
"da_DK.zip", "da_DK.dic", "da_DK.aff", "da_DK.zip", "da_DK.dic", "da_DK.aff",

View File

@ -61,7 +61,7 @@ public class TestAllDictionaries2 extends LuceneTestCase {
"ceske_slovniky_pro_kontrolu_pravopisu-1.0.4-tb+sm+fx.xpi", "dictionaries/cs.dic", "dictionaries/cs.aff", "ceske_slovniky_pro_kontrolu_pravopisu-1.0.4-tb+sm+fx.xpi", "dictionaries/cs.dic", "dictionaries/cs.aff",
"chichewa_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/ny_MW.dic", "dictionaries/ny_MW.aff", "chichewa_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/ny_MW.dic", "dictionaries/ny_MW.aff",
"corrector_de_galego-13.10.0-fn+sm+tb+fx.xpi", "dictionaries/gl_ES.dic", "dictionaries/gl_ES.aff", "corrector_de_galego-13.10.0-fn+sm+tb+fx.xpi", "dictionaries/gl_ES.dic", "dictionaries/gl_ES.aff",
"corrector_orthographic_de_interlingua-6.0-fn+sm+tb+fx.xpi", "dictionaries/ia-ia.dic", "dictionaries/ia-ia.aff", //BUG: broken flags "corrector_orthographic_de_interlingua-6.0-fn+sm+tb+fx.xpi", "dictionaries/ia-ia.dic", "dictionaries/ia-ia.aff",
"corrector_ortografico_aragones-0.2-fx+tb+sm.xpi", "dictionaries/an_ES.dic", "dictionaries/an_ES.aff", "corrector_ortografico_aragones-0.2-fx+tb+sm.xpi", "dictionaries/an_ES.dic", "dictionaries/an_ES.aff",
"croatian_dictionary_-_hrvatski_rjecnik-1.0.1-firefox+thunderbird+seamonkey.xpi", "dictionaries/hr.dic", "dictionaries/hr.aff", "croatian_dictionary_-_hrvatski_rjecnik-1.0.1-firefox+thunderbird+seamonkey.xpi", "dictionaries/hr.dic", "dictionaries/hr.aff",
"croatian_dictionary_hrvatski_rjecnik-1.0.9-an+fx+fn+tb+sm.xpi", "dictionaries/hr-HR.dic", "dictionaries/hr-HR.aff", "croatian_dictionary_hrvatski_rjecnik-1.0.9-an+fx+fn+tb+sm.xpi", "dictionaries/hr-HR.dic", "dictionaries/hr-HR.aff",
@ -149,7 +149,7 @@ public class TestAllDictionaries2 extends LuceneTestCase {
//BUG: broken file (hunspell refuses to load, too) "thamizha_solthiruthitamil_spellchecker-0.8-fx+tb.xpi", "dictionaries/ta_IN.dic", "dictionaries/ta_IN.aff", //BUG: broken file (hunspell refuses to load, too) "thamizha_solthiruthitamil_spellchecker-0.8-fx+tb.xpi", "dictionaries/ta_IN.dic", "dictionaries/ta_IN.aff",
"tsonga_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/ts-ZA.dic", "dictionaries/ts-ZA.aff", "tsonga_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/ts-ZA.dic", "dictionaries/ts-ZA.aff",
"tswana_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/tn-ZA.dic", "dictionaries/tn-ZA.aff", "tswana_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/tn-ZA.dic", "dictionaries/tn-ZA.aff",
"turkce_yazm_denetimi-3.5-sm+tb+fx.xpi", "dictionaries/tr.dic", "dictionaries/tr.aff", //BUG: missing FLAG declaration "turkce_yazm_denetimi-3.5-sm+tb+fx.xpi", "dictionaries/tr.dic", "dictionaries/tr.aff",
"turkmen_spell_checker_dictionary-0.1.6-tb+fx+sm.xpi", "dictionaries/tk_TM.dic", "dictionaries/tk_TM.aff", "turkmen_spell_checker_dictionary-0.1.6-tb+fx+sm.xpi", "dictionaries/tk_TM.dic", "dictionaries/tk_TM.aff",
"ukrainian_dictionary-1.7.0-sm+an+fx+fn+tb.xpi", "dictionaries/uk-UA.dic", "dictionaries/uk-UA.aff", "ukrainian_dictionary-1.7.0-sm+an+fx+fn+tb.xpi", "dictionaries/uk-UA.dic", "dictionaries/uk-UA.aff",
"united_states_english_spellchecker-7.0.1-sm+tb+fx+an.xpi", "dictionaries/en-US.dic", "dictionaries/en-US.aff", "united_states_english_spellchecker-7.0.1-sm+tb+fx+an.xpi", "dictionaries/en-US.dic", "dictionaries/en-US.aff",

View File

@ -131,6 +131,22 @@ public class TestDictionary extends LuceneTestCase {
dictStream.close(); dictStream.close();
} }
// malformed flags causes ParseException
public void testInvalidFlags() throws Exception {
InputStream affixStream = getClass().getResourceAsStream("broken-flags.aff");
InputStream dictStream = getClass().getResourceAsStream("simple.dic");
try {
new Dictionary(affixStream, dictStream);
fail("didn't get expected exception");
} catch (Exception expected) {
assertTrue(expected.getMessage().startsWith("expected only one flag"));
}
affixStream.close();
dictStream.close();
}
private class CloseCheckInputStream extends FilterInputStream { private class CloseCheckInputStream extends FilterInputStream {
private boolean closed = false; private boolean closed = false;

View File

@ -0,0 +1,21 @@
SET UTF-8
TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
SFX A Y 3
SFX A 0 e n
SFX A 0 e t
SFX A 0 e h
SFX C Y 2
SFX C 0 d/C c
SFX C 0 c b
SFX D Y 1
SFX D 0 s o
SFX E Y 1
SFX E 0 d o
# broken, the flag has too much in it
PFX B0 Y 1
PFX B0 0 s o