SOLR-2576: DirectSolrSpellChecker was not returning freq info for the original token

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1133187 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-06-07 22:48:41 +00:00
parent aa92c15202
commit 5a1e1bf4c8
8 changed files with 47 additions and 14 deletions

View File

@ -94,8 +94,8 @@ New Features
* SOLR-792: Adding PivotFacetComponent for Hierarchical faceting
(erik, Jeremy Hinegardner, Thibaut Lassalle, ryan)
* LUCENE-2507, SOLR-2571: Added DirectSolrSpellChecker, which uses Lucene's DirectSpellChecker
to retrieve correction candidates directly from the term dictionary using
* LUCENE-2507, SOLR-2571, SOLR-2576: Added DirectSolrSpellChecker, which uses Lucene's
DirectSpellChecker to retrieve correction candidates directly from the term dictionary using
levenshtein automata. (James Dyer, rmuir)
* SOLR-1873: SolrCloud - added shared/central config and core/shard managment via zookeeper,
@ -256,6 +256,9 @@ Other Changes
HighFrequencyDictionary, SortedIterator, TermFreqIterator, and the
suggester APIs and implementations. (rmuir)
* SOLR-2576: Remove deprecated SpellingResult.add(Token, int).
(James Dyer via rmuir)
Documentation
----------------------

View File

@ -435,7 +435,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
if (extendedResults) {
Integer o = origVsFreq.get(original);
if (o != null) result.add(token, o);
if (o != null) result.addFrequency(token, o);
for (SuggestWord word : suggestions)
result.add(token, word.string, word.freq);
} else {

View File

@ -182,7 +182,7 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
if (options.extendedResults == true && reader != null && field != null) {
term = term.createTerm(tokenText);
result.add(token, reader.docFreq(term));
result.addFrequency(token, reader.docFreq(term));
int countLimit = Math.min(options.count, suggestions.length);
if(countLimit>0)
{

View File

@ -27,6 +27,7 @@ import org.apache.lucene.search.spell.StringDistance;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
import org.apache.lucene.search.spell.SuggestWordQueue;
import org.apache.solr.common.params.SpellingParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.search.SolrIndexSearcher;
@ -179,10 +180,13 @@ public class DirectSolrSpellChecker extends SolrSpellChecker {
float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy;
for (Token token : options.tokens) {
SuggestWord[] suggestions = checker.suggestSimilar(new Term(field, token.toString()),
Term term = new Term(field, token.toString());
SuggestWord[] suggestions = checker.suggestSimilar(term,
options.count, options.reader, options.onlyMorePopular, accuracy);
for (SuggestWord suggestion : suggestions)
result.addFrequency(token, options.reader.docFreq(term));
for (SuggestWord suggestion : suggestions) {
result.add(token, suggestion.string, suggestion.freq);
}
}
return result;
}

View File

@ -66,7 +66,13 @@ public class SpellingResult {
}
}
public void add(Token token, int docFreq) {
/**
* Adds an original token with its document frequency
*
* @param token original token
* @param docFreq original token's document frequency
*/
public void addFrequency(Token token, int docFreq) {
if (tokenFrequency == null) {
tokenFrequency = new LinkedHashMap<Token, Integer>();
}

View File

@ -49,6 +49,12 @@ Config for testing spellcheck component
<str name="spellcheckIndexDir">spellchecker1</str>
<str name="buildOnCommit">true</str>
</lst>
<lst name="spellchecker">
<str name="name">direct</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<int name="minQueryLength">3</int>
<str name="field">teststop</str>
</lst>
<lst name="spellchecker">
<str name="name">threshold</str>
<str name="field">lowerfilt</str>

View File

@ -198,16 +198,15 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
public void testThresholdTokenFrequency() throws Exception {
//"document" is in 2 documents but "another" is only in 1.
//So with a threshold of 15%, "another" is absent from the dictionary
//So with a threshold of 29%, "another" is absent from the dictionary
//while "document" is present.
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documenq", SpellCheckComponent.SPELLCHECK_DICT, "threshold", SpellCheckComponent.SPELLCHECK_COUNT,"5", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS,"true")
,"/spellcheck/suggestions/[1]/suggestion==[{'word':'document','freq':2}]"
);
//TODO: DirectSolrSpellChecker returns a different format. Is this OK? Does SOLRJ need tweaking to handle this???
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documenq", SpellCheckComponent.SPELLCHECK_DICT, "threshold_direct", SpellCheckComponent.SPELLCHECK_COUNT,"5", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS,"true")
,"/spellcheck/suggestions/[1]/suggestion==['document']]"
,"/spellcheck/suggestions/[1]/suggestion==[{'word':'document','freq':2}]"
);
//TODO: how do we make this into a 1-liner using "assertQ()" ???
@ -246,7 +245,6 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
suggestions = (NamedList) spellCheck.get("suggestions");
assertTrue(suggestions.get("suggestion")==null);
//TODO: Why is DirectSolrSpellChecker returning "true" here? Is that OK?
//assertTrue((Boolean) suggestions.get("correctlySpelled")==false);
assertTrue((Boolean) suggestions.get("correctlySpelled")==false);
}
}

View File

@ -22,10 +22,14 @@ import java.util.Map;
import org.apache.lucene.analysis.Token;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.SpellCheckComponent;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
import org.apache.solr.util.TestHarness;
import org.junit.BeforeClass;
import org.junit.Test;
@ -38,12 +42,13 @@ public class DirectSolrSpellCheckerTest extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig.xml","schema.xml");
initCore("solrconfig-spellcheckcomponent.xml","schema.xml");
//Index something with a title
assertNull(h.validateUpdate(adoc("id", "0", "teststop", "This is a title")));
assertNull(h.validateUpdate(adoc("id", "1", "teststop", "The quick reb fox jumped over the lazy brown dogs.")));
assertNull(h.validateUpdate(adoc("id", "2", "teststop", "This is a Solr")));
assertNull(h.validateUpdate(adoc("id", "3", "teststop", "solr foo")));
assertNull(h.validateUpdate(adoc("id", "4", "teststop", "another foo")));
assertNull(h.validateUpdate(commit()));
queryConverter = new SimpleQueryConverter();
queryConverter.init(new NamedList());
@ -77,4 +82,15 @@ public class DirectSolrSpellCheckerTest extends SolrTestCaseJ4 {
assertTrue("suggestions is not null and it should be", suggestions == null);
searcher.decref();
}
@Test
public void testOnlyMorePopularWithExtendedResults() throws Exception {
assertQ(req("q", "teststop:fox", "qt", "spellCheckCompRH", SpellCheckComponent.COMPONENT_NAME, "true", SpellCheckComponent.SPELLCHECK_DICT, "direct", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_ONLY_MORE_POPULAR, "true"),
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='fox']/int[@name='origFreq']=1",
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='fox']/arr[@name='suggestion']/lst/str[@name='word']='foo'",
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='fox']/arr[@name='suggestion']/lst/int[@name='freq']=2",
"//lst[@name='spellcheck']/lst[@name='suggestions']/bool[@name='correctlySpelled']='true'"
);
}
}