mirror of https://github.com/apache/lucene.git
LUCENE-5517: stricter parsing for hunspell parseFlag
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1576389 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
03de125b85
commit
2c98dd5b32
|
@ -822,7 +822,11 @@ public class Dictionary {
|
|||
* @return Parsed flag
|
||||
*/
|
||||
char parseFlag(String rawFlag) {
|
||||
return parseFlags(rawFlag)[0];
|
||||
char flags[] = parseFlags(rawFlag);
|
||||
if (flags.length != 1) {
|
||||
throw new IllegalArgumentException("expected only one flag, got: " + rawFlag);
|
||||
}
|
||||
return flags[0];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -47,7 +47,7 @@ public class TestAllDictionaries extends LuceneTestCase {
|
|||
"bg_BG.zip", "bg_BG.dic", "bg_BG.aff",
|
||||
"ca_ANY.zip", "catalan.dic", "catalan.aff",
|
||||
"ca_ES.zip", "ca_ES.dic", "ca_ES.aff",
|
||||
"cop_EG.zip", "cop_EG.dic", "cop_EG.aff",
|
||||
// BUG: broken flag "cop_EG.zip", "cop_EG.dic", "cop_EG.aff",
|
||||
"cs_CZ.zip", "cs_CZ.dic", "cs_CZ.aff",
|
||||
"cy_GB.zip", "cy_GB.dic", "cy_GB.aff",
|
||||
"da_DK.zip", "da_DK.dic", "da_DK.aff",
|
||||
|
|
|
@ -61,7 +61,7 @@ public class TestAllDictionaries2 extends LuceneTestCase {
|
|||
"ceske_slovniky_pro_kontrolu_pravopisu-1.0.4-tb+sm+fx.xpi", "dictionaries/cs.dic", "dictionaries/cs.aff",
|
||||
"chichewa_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/ny_MW.dic", "dictionaries/ny_MW.aff",
|
||||
"corrector_de_galego-13.10.0-fn+sm+tb+fx.xpi", "dictionaries/gl_ES.dic", "dictionaries/gl_ES.aff",
|
||||
"corrector_orthographic_de_interlingua-6.0-fn+sm+tb+fx.xpi", "dictionaries/ia-ia.dic", "dictionaries/ia-ia.aff",
|
||||
//BUG: broken flags "corrector_orthographic_de_interlingua-6.0-fn+sm+tb+fx.xpi", "dictionaries/ia-ia.dic", "dictionaries/ia-ia.aff",
|
||||
"corrector_ortografico_aragones-0.2-fx+tb+sm.xpi", "dictionaries/an_ES.dic", "dictionaries/an_ES.aff",
|
||||
"croatian_dictionary_-_hrvatski_rjecnik-1.0.1-firefox+thunderbird+seamonkey.xpi", "dictionaries/hr.dic", "dictionaries/hr.aff",
|
||||
"croatian_dictionary_hrvatski_rjecnik-1.0.9-an+fx+fn+tb+sm.xpi", "dictionaries/hr-HR.dic", "dictionaries/hr-HR.aff",
|
||||
|
@ -149,7 +149,7 @@ public class TestAllDictionaries2 extends LuceneTestCase {
|
|||
//BUG: broken file (hunspell refuses to load, too) "thamizha_solthiruthitamil_spellchecker-0.8-fx+tb.xpi", "dictionaries/ta_IN.dic", "dictionaries/ta_IN.aff",
|
||||
"tsonga_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/ts-ZA.dic", "dictionaries/ts-ZA.aff",
|
||||
"tswana_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/tn-ZA.dic", "dictionaries/tn-ZA.aff",
|
||||
"turkce_yazm_denetimi-3.5-sm+tb+fx.xpi", "dictionaries/tr.dic", "dictionaries/tr.aff",
|
||||
//BUG: missing FLAG declaration "turkce_yazm_denetimi-3.5-sm+tb+fx.xpi", "dictionaries/tr.dic", "dictionaries/tr.aff",
|
||||
"turkmen_spell_checker_dictionary-0.1.6-tb+fx+sm.xpi", "dictionaries/tk_TM.dic", "dictionaries/tk_TM.aff",
|
||||
"ukrainian_dictionary-1.7.0-sm+an+fx+fn+tb.xpi", "dictionaries/uk-UA.dic", "dictionaries/uk-UA.aff",
|
||||
"united_states_english_spellchecker-7.0.1-sm+tb+fx+an.xpi", "dictionaries/en-US.dic", "dictionaries/en-US.aff",
|
||||
|
|
|
@ -131,6 +131,22 @@ public class TestDictionary extends LuceneTestCase {
|
|||
dictStream.close();
|
||||
}
|
||||
|
||||
// malformed flags causes ParseException
|
||||
public void testInvalidFlags() throws Exception {
|
||||
InputStream affixStream = getClass().getResourceAsStream("broken-flags.aff");
|
||||
InputStream dictStream = getClass().getResourceAsStream("simple.dic");
|
||||
|
||||
try {
|
||||
new Dictionary(affixStream, dictStream);
|
||||
fail("didn't get expected exception");
|
||||
} catch (Exception expected) {
|
||||
assertTrue(expected.getMessage().startsWith("expected only one flag"));
|
||||
}
|
||||
|
||||
affixStream.close();
|
||||
dictStream.close();
|
||||
}
|
||||
|
||||
private class CloseCheckInputStream extends FilterInputStream {
|
||||
private boolean closed = false;
|
||||
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
SET UTF-8
|
||||
TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
|
||||
SFX A Y 3
|
||||
SFX A 0 e n
|
||||
SFX A 0 e t
|
||||
SFX A 0 e h
|
||||
|
||||
SFX C Y 2
|
||||
SFX C 0 d/C c
|
||||
SFX C 0 c b
|
||||
|
||||
SFX D Y 1
|
||||
SFX D 0 s o
|
||||
|
||||
SFX E Y 1
|
||||
SFX E 0 d o
|
||||
|
||||
# broken, the flag has too much in it
|
||||
PFX B0 Y 1
|
||||
PFX B0 0 s o
|
Loading…
Reference in New Issue