SOLR-4452: Hunspell stemmer should not merge duplicate dictionary entries

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1499164 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jan Høydahl 2013-07-03 00:36:19 +00:00
parent 42ca89d233
commit 756e541620
5 changed files with 29 additions and 7 deletions

View File

@ -383,12 +383,9 @@ public class HunspellDictionary {
}
}
List<HunspellWord> entries = words.get(entry);
if (entries == null) {
entries = new ArrayList<HunspellWord>();
words.put(entry, entries);
}
List<HunspellWord> entries = new ArrayList<HunspellWord>();
entries.add(wordForm);
words.put(entry, entries);
}
}

View File

@ -20,7 +20,9 @@ package org.apache.lucene.analysis.hunspell;
import java.io.IOException;
import java.io.InputStream;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Assert;
@ -131,11 +133,29 @@ public class HunspellDictionaryTest extends LuceneTestCase {
assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).size());
assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).size());
assertEquals(1, dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3).size());
assertEquals("Wrong number of flags for lucen", 1, dictionary.lookupWord(new char[]{'l', 'u', 'c', 'e', 'n'}, 0, 5).get(0).getFlags().length);
affixStream.close();
dictStream.close();
}
@Test
public void testHunspellDictionary_multipleDictWithOverride() throws IOException, ParseException {
InputStream affixStream = getClass().getResourceAsStream("test.aff");
List<InputStream> dictStreams = new ArrayList<InputStream>();
dictStreams.add(getClass().getResourceAsStream("test.dic"));
dictStreams.add(getClass().getResourceAsStream("testOverride.dic"));
HunspellDictionary dictionary = new HunspellDictionary(affixStream, dictStreams, TEST_VERSION_CURRENT, false);
assertEquals("Wrong number of flags for lucen", 3, dictionary.lookupWord(new char[]{'l', 'u', 'c', 'e', 'n'}, 0, 5).get(0).getFlags().length);
assertEquals("Wrong number of flags for bar", 1, dictionary.lookupWord(new char[]{'b', 'a', 'r'}, 0, 3).get(0).getFlags().length);
affixStream.close();
for(InputStream dstream : dictStreams) {
dstream.close();
}
}
@Test
public void testCompressedHunspellDictionary_loadDicAff() throws IOException, ParseException {
InputStream affixStream = getClass().getResourceAsStream("testCompressed.aff");

View File

@ -1,4 +1,4 @@
6
8
lucen/A
lucene
mahout/A

View File

@ -0,0 +1,3 @@
2
lucen/ABC
bar/A

View File

@ -240,6 +240,8 @@ Bug Fixes
* SOLR-3369: shards.tolerant=true is broken for group queries
(Russell Black, Martijn van Groningen, Jabouille jean Charles, Ryan McKinley via shalin)
* SOLR-4452: Hunspell stemmer should not merge duplicate dictionary entries (janhoy)
Optimizations
----------------------