mirror of https://github.com/apache/lucene.git
SOLR-4452: Hunspell stemmer should not merge duplicate dictionary entries
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1499164 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
42ca89d233
commit
756e541620
|
@ -383,12 +383,9 @@ public class HunspellDictionary {
|
|||
}
|
||||
}
|
||||
|
||||
List<HunspellWord> entries = words.get(entry);
|
||||
if (entries == null) {
|
||||
entries = new ArrayList<HunspellWord>();
|
||||
words.put(entry, entries);
|
||||
}
|
||||
List<HunspellWord> entries = new ArrayList<HunspellWord>();
|
||||
entries.add(wordForm);
|
||||
words.put(entry, entries);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,9 @@ package org.apache.lucene.analysis.hunspell;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.text.ParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.Assert;
|
||||
|
@ -131,11 +133,29 @@ public class HunspellDictionaryTest extends LuceneTestCase {
|
|||
assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).size());
|
||||
assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).size());
|
||||
assertEquals(1, dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3).size());
|
||||
assertEquals("Wrong number of flags for lucen", 1, dictionary.lookupWord(new char[]{'l', 'u', 'c', 'e', 'n'}, 0, 5).get(0).getFlags().length);
|
||||
|
||||
affixStream.close();
|
||||
dictStream.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHunspellDictionary_multipleDictWithOverride() throws IOException, ParseException {
|
||||
InputStream affixStream = getClass().getResourceAsStream("test.aff");
|
||||
List<InputStream> dictStreams = new ArrayList<InputStream>();
|
||||
dictStreams.add(getClass().getResourceAsStream("test.dic"));
|
||||
dictStreams.add(getClass().getResourceAsStream("testOverride.dic"));
|
||||
|
||||
HunspellDictionary dictionary = new HunspellDictionary(affixStream, dictStreams, TEST_VERSION_CURRENT, false);
|
||||
assertEquals("Wrong number of flags for lucen", 3, dictionary.lookupWord(new char[]{'l', 'u', 'c', 'e', 'n'}, 0, 5).get(0).getFlags().length);
|
||||
assertEquals("Wrong number of flags for bar", 1, dictionary.lookupWord(new char[]{'b', 'a', 'r'}, 0, 3).get(0).getFlags().length);
|
||||
|
||||
affixStream.close();
|
||||
for(InputStream dstream : dictStreams) {
|
||||
dstream.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompressedHunspellDictionary_loadDicAff() throws IOException, ParseException {
|
||||
InputStream affixStream = getClass().getResourceAsStream("testCompressed.aff");
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
6
|
||||
8
|
||||
lucen/A
|
||||
lucene
|
||||
mahout/A
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
2
|
||||
lucen/ABC
|
||||
bar/A
|
|
@ -240,6 +240,8 @@ Bug Fixes
|
|||
* SOLR-3369: shards.tolerant=true is broken for group queries
|
||||
(Russell Black, Martijn van Groningen, Jabouille jean Charles, Ryan McKinley via shalin)
|
||||
|
||||
* SOLR-4452: Hunspell stemmer should not merge duplicate dictionary entries (janhoy)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
Loading…
Reference in New Issue