mirror of https://github.com/apache/lucene.git
SOLR-2576: DirectSolrSpellChecker was not returning freq info for the original token
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1133187 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
aa92c15202
commit
5a1e1bf4c8
|
@ -94,8 +94,8 @@ New Features
|
|||
* SOLR-792: Adding PivotFacetComponent for Hierarchical faceting
|
||||
(erik, Jeremy Hinegardner, Thibaut Lassalle, ryan)
|
||||
|
||||
* LUCENE-2507, SOLR-2571: Added DirectSolrSpellChecker, which uses Lucene's DirectSpellChecker
|
||||
to retrieve correction candidates directly from the term dictionary using
|
||||
* LUCENE-2507, SOLR-2571, SOLR-2576: Added DirectSolrSpellChecker, which uses Lucene's
|
||||
DirectSpellChecker to retrieve correction candidates directly from the term dictionary using
|
||||
levenshtein automata. (James Dyer, rmuir)
|
||||
|
||||
* SOLR-1873: SolrCloud - added shared/central config and core/shard managment via zookeeper,
|
||||
|
@ -256,6 +256,9 @@ Other Changes
|
|||
HighFrequencyDictionary, SortedIterator, TermFreqIterator, and the
|
||||
suggester APIs and implementations. (rmuir)
|
||||
|
||||
* SOLR-2576: Remove deprecated SpellingResult.add(Token, int).
|
||||
(James Dyer via rmuir)
|
||||
|
||||
Documentation
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -435,7 +435,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
|
||||
if (extendedResults) {
|
||||
Integer o = origVsFreq.get(original);
|
||||
if (o != null) result.add(token, o);
|
||||
if (o != null) result.addFrequency(token, o);
|
||||
for (SuggestWord word : suggestions)
|
||||
result.add(token, word.string, word.freq);
|
||||
} else {
|
||||
|
|
|
@ -182,7 +182,7 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
|
|||
|
||||
if (options.extendedResults == true && reader != null && field != null) {
|
||||
term = term.createTerm(tokenText);
|
||||
result.add(token, reader.docFreq(term));
|
||||
result.addFrequency(token, reader.docFreq(term));
|
||||
int countLimit = Math.min(options.count, suggestions.length);
|
||||
if(countLimit>0)
|
||||
{
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.search.spell.StringDistance;
|
|||
import org.apache.lucene.search.spell.SuggestWord;
|
||||
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
|
||||
import org.apache.lucene.search.spell.SuggestWordQueue;
|
||||
import org.apache.solr.common.params.SpellingParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
@ -179,11 +180,14 @@ public class DirectSolrSpellChecker extends SolrSpellChecker {
|
|||
float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy;
|
||||
|
||||
for (Token token : options.tokens) {
|
||||
SuggestWord[] suggestions = checker.suggestSimilar(new Term(field, token.toString()),
|
||||
Term term = new Term(field, token.toString());
|
||||
SuggestWord[] suggestions = checker.suggestSimilar(term,
|
||||
options.count, options.reader, options.onlyMorePopular, accuracy);
|
||||
for (SuggestWord suggestion : suggestions)
|
||||
result.addFrequency(token, options.reader.docFreq(term));
|
||||
for (SuggestWord suggestion : suggestions) {
|
||||
result.add(token, suggestion.string, suggestion.freq);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -66,7 +66,13 @@ public class SpellingResult {
|
|||
}
|
||||
}
|
||||
|
||||
public void add(Token token, int docFreq) {
|
||||
/**
|
||||
* Adds an original token with its document frequency
|
||||
*
|
||||
* @param token original token
|
||||
* @param docFreq original token's document frequency
|
||||
*/
|
||||
public void addFrequency(Token token, int docFreq) {
|
||||
if (tokenFrequency == null) {
|
||||
tokenFrequency = new LinkedHashMap<Token, Integer>();
|
||||
}
|
||||
|
|
|
@ -49,6 +49,12 @@ Config for testing spellcheck component
|
|||
<str name="spellcheckIndexDir">spellchecker1</str>
|
||||
<str name="buildOnCommit">true</str>
|
||||
</lst>
|
||||
<lst name="spellchecker">
|
||||
<str name="name">direct</str>
|
||||
<str name="classname">solr.DirectSolrSpellChecker</str>
|
||||
<int name="minQueryLength">3</int>
|
||||
<str name="field">teststop</str>
|
||||
</lst>
|
||||
<lst name="spellchecker">
|
||||
<str name="name">threshold</str>
|
||||
<str name="field">lowerfilt</str>
|
||||
|
|
|
@ -198,16 +198,15 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
|
|||
public void testThresholdTokenFrequency() throws Exception {
|
||||
|
||||
//"document" is in 2 documents but "another" is only in 1.
|
||||
//So with a threshold of 15%, "another" is absent from the dictionary
|
||||
//So with a threshold of 29%, "another" is absent from the dictionary
|
||||
//while "document" is present.
|
||||
|
||||
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documenq", SpellCheckComponent.SPELLCHECK_DICT, "threshold", SpellCheckComponent.SPELLCHECK_COUNT,"5", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS,"true")
|
||||
,"/spellcheck/suggestions/[1]/suggestion==[{'word':'document','freq':2}]"
|
||||
);
|
||||
|
||||
//TODO: DirectSolrSpellChecker returns a different format. Is this OK? Does SOLRJ need tweaking to handle this???
|
||||
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documenq", SpellCheckComponent.SPELLCHECK_DICT, "threshold_direct", SpellCheckComponent.SPELLCHECK_COUNT,"5", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS,"true")
|
||||
,"/spellcheck/suggestions/[1]/suggestion==['document']]"
|
||||
,"/spellcheck/suggestions/[1]/suggestion==[{'word':'document','freq':2}]"
|
||||
);
|
||||
|
||||
//TODO: how do we make this into a 1-liner using "assertQ()" ???
|
||||
|
@ -246,7 +245,6 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
|
|||
suggestions = (NamedList) spellCheck.get("suggestions");
|
||||
assertTrue(suggestions.get("suggestion")==null);
|
||||
|
||||
//TODO: Why is DirectSolrSpellChecker returning "true" here? Is that OK?
|
||||
//assertTrue((Boolean) suggestions.get("correctlySpelled")==false);
|
||||
assertTrue((Boolean) suggestions.get("correctlySpelled")==false);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,10 +22,14 @@ import java.util.Map;
|
|||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.component.SpellCheckComponent;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.apache.solr.util.TestHarness;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@ -38,12 +42,13 @@ public class DirectSolrSpellCheckerTest extends SolrTestCaseJ4 {
|
|||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig.xml","schema.xml");
|
||||
initCore("solrconfig-spellcheckcomponent.xml","schema.xml");
|
||||
//Index something with a title
|
||||
assertNull(h.validateUpdate(adoc("id", "0", "teststop", "This is a title")));
|
||||
assertNull(h.validateUpdate(adoc("id", "1", "teststop", "The quick reb fox jumped over the lazy brown dogs.")));
|
||||
assertNull(h.validateUpdate(adoc("id", "2", "teststop", "This is a Solr")));
|
||||
assertNull(h.validateUpdate(adoc("id", "3", "teststop", "solr foo")));
|
||||
assertNull(h.validateUpdate(adoc("id", "4", "teststop", "another foo")));
|
||||
assertNull(h.validateUpdate(commit()));
|
||||
queryConverter = new SimpleQueryConverter();
|
||||
queryConverter.init(new NamedList());
|
||||
|
@ -77,4 +82,15 @@ public class DirectSolrSpellCheckerTest extends SolrTestCaseJ4 {
|
|||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
||||
searcher.decref();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOnlyMorePopularWithExtendedResults() throws Exception {
|
||||
assertQ(req("q", "teststop:fox", "qt", "spellCheckCompRH", SpellCheckComponent.COMPONENT_NAME, "true", SpellCheckComponent.SPELLCHECK_DICT, "direct", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_ONLY_MORE_POPULAR, "true"),
|
||||
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='fox']/int[@name='origFreq']=1",
|
||||
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='fox']/arr[@name='suggestion']/lst/str[@name='word']='foo'",
|
||||
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='fox']/arr[@name='suggestion']/lst/int[@name='freq']=2",
|
||||
"//lst[@name='spellcheck']/lst[@name='suggestions']/bool[@name='correctlySpelled']='true'"
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue