SOLR-2576: DirectSolrSpellChecker was not returning freq info for the original token

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1133187 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-06-07 22:48:41 +00:00
parent aa92c15202
commit 5a1e1bf4c8
8 changed files with 47 additions and 14 deletions

View File

@ -94,8 +94,8 @@ New Features
* SOLR-792: Adding PivotFacetComponent for Hierarchical faceting * SOLR-792: Adding PivotFacetComponent for Hierarchical faceting
(erik, Jeremy Hinegardner, Thibaut Lassalle, ryan) (erik, Jeremy Hinegardner, Thibaut Lassalle, ryan)
* LUCENE-2507, SOLR-2571: Added DirectSolrSpellChecker, which uses Lucene's DirectSpellChecker * LUCENE-2507, SOLR-2571, SOLR-2576: Added DirectSolrSpellChecker, which uses Lucene's
to retrieve correction candidates directly from the term dictionary using DirectSpellChecker to retrieve correction candidates directly from the term dictionary using
levenshtein automata. (James Dyer, rmuir) levenshtein automata. (James Dyer, rmuir)
* SOLR-1873: SolrCloud - added shared/central config and core/shard managment via zookeeper, * SOLR-1873: SolrCloud - added shared/central config and core/shard managment via zookeeper,
@ -256,6 +256,9 @@ Other Changes
HighFrequencyDictionary, SortedIterator, TermFreqIterator, and the HighFrequencyDictionary, SortedIterator, TermFreqIterator, and the
suggester APIs and implementations. (rmuir) suggester APIs and implementations. (rmuir)
* SOLR-2576: Remove deprecated SpellingResult.add(Token, int).
(James Dyer via rmuir)
Documentation Documentation
---------------------- ----------------------

View File

@ -435,7 +435,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
if (extendedResults) { if (extendedResults) {
Integer o = origVsFreq.get(original); Integer o = origVsFreq.get(original);
if (o != null) result.add(token, o); if (o != null) result.addFrequency(token, o);
for (SuggestWord word : suggestions) for (SuggestWord word : suggestions)
result.add(token, word.string, word.freq); result.add(token, word.string, word.freq);
} else { } else {

View File

@ -182,7 +182,7 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
if (options.extendedResults == true && reader != null && field != null) { if (options.extendedResults == true && reader != null && field != null) {
term = term.createTerm(tokenText); term = term.createTerm(tokenText);
result.add(token, reader.docFreq(term)); result.addFrequency(token, reader.docFreq(term));
int countLimit = Math.min(options.count, suggestions.length); int countLimit = Math.min(options.count, suggestions.length);
if(countLimit>0) if(countLimit>0)
{ {

View File

@ -27,6 +27,7 @@ import org.apache.lucene.search.spell.StringDistance;
import org.apache.lucene.search.spell.SuggestWord; import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator; import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
import org.apache.lucene.search.spell.SuggestWordQueue; import org.apache.lucene.search.spell.SuggestWordQueue;
import org.apache.solr.common.params.SpellingParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
@ -179,10 +180,13 @@ public class DirectSolrSpellChecker extends SolrSpellChecker {
float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy; float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy;
for (Token token : options.tokens) { for (Token token : options.tokens) {
SuggestWord[] suggestions = checker.suggestSimilar(new Term(field, token.toString()), Term term = new Term(field, token.toString());
SuggestWord[] suggestions = checker.suggestSimilar(term,
options.count, options.reader, options.onlyMorePopular, accuracy); options.count, options.reader, options.onlyMorePopular, accuracy);
for (SuggestWord suggestion : suggestions) result.addFrequency(token, options.reader.docFreq(term));
result.add(token, suggestion.string, suggestion.freq); for (SuggestWord suggestion : suggestions) {
result.add(token, suggestion.string, suggestion.freq);
}
} }
return result; return result;
} }

View File

@ -66,7 +66,13 @@ public class SpellingResult {
} }
} }
public void add(Token token, int docFreq) { /**
* Adds an original token with its document frequency
*
* @param token original token
* @param docFreq original token's document frequency
*/
public void addFrequency(Token token, int docFreq) {
if (tokenFrequency == null) { if (tokenFrequency == null) {
tokenFrequency = new LinkedHashMap<Token, Integer>(); tokenFrequency = new LinkedHashMap<Token, Integer>();
} }

View File

@ -49,6 +49,12 @@ Config for testing spellcheck component
<str name="spellcheckIndexDir">spellchecker1</str> <str name="spellcheckIndexDir">spellchecker1</str>
<str name="buildOnCommit">true</str> <str name="buildOnCommit">true</str>
</lst> </lst>
<lst name="spellchecker">
<str name="name">direct</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<int name="minQueryLength">3</int>
<str name="field">teststop</str>
</lst>
<lst name="spellchecker"> <lst name="spellchecker">
<str name="name">threshold</str> <str name="name">threshold</str>
<str name="field">lowerfilt</str> <str name="field">lowerfilt</str>

View File

@ -198,16 +198,15 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
public void testThresholdTokenFrequency() throws Exception { public void testThresholdTokenFrequency() throws Exception {
//"document" is in 2 documents but "another" is only in 1. //"document" is in 2 documents but "another" is only in 1.
//So with a threshold of 15%, "another" is absent from the dictionary //So with a threshold of 29%, "another" is absent from the dictionary
//while "document" is present. //while "document" is present.
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documenq", SpellCheckComponent.SPELLCHECK_DICT, "threshold", SpellCheckComponent.SPELLCHECK_COUNT,"5", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS,"true") assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documenq", SpellCheckComponent.SPELLCHECK_DICT, "threshold", SpellCheckComponent.SPELLCHECK_COUNT,"5", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS,"true")
,"/spellcheck/suggestions/[1]/suggestion==[{'word':'document','freq':2}]" ,"/spellcheck/suggestions/[1]/suggestion==[{'word':'document','freq':2}]"
); );
//TODO: DirectSolrSpellChecker returns a different format. Is this OK? Does SOLRJ need tweaking to handle this???
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documenq", SpellCheckComponent.SPELLCHECK_DICT, "threshold_direct", SpellCheckComponent.SPELLCHECK_COUNT,"5", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS,"true") assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documenq", SpellCheckComponent.SPELLCHECK_DICT, "threshold_direct", SpellCheckComponent.SPELLCHECK_COUNT,"5", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS,"true")
,"/spellcheck/suggestions/[1]/suggestion==['document']]" ,"/spellcheck/suggestions/[1]/suggestion==[{'word':'document','freq':2}]"
); );
//TODO: how do we make this into a 1-liner using "assertQ()" ??? //TODO: how do we make this into a 1-liner using "assertQ()" ???
@ -246,7 +245,6 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
suggestions = (NamedList) spellCheck.get("suggestions"); suggestions = (NamedList) spellCheck.get("suggestions");
assertTrue(suggestions.get("suggestion")==null); assertTrue(suggestions.get("suggestion")==null);
//TODO: Why is DirectSolrSpellChecker returning "true" here? Is that OK? assertTrue((Boolean) suggestions.get("correctlySpelled")==false);
//assertTrue((Boolean) suggestions.get("correctlySpelled")==false);
} }
} }

View File

@ -22,10 +22,14 @@ import java.util.Map;
import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.Token;
import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.SpellCheckComponent;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted; import org.apache.solr.util.RefCounted;
import org.apache.solr.util.TestHarness;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
@ -38,12 +42,13 @@ public class DirectSolrSpellCheckerTest extends SolrTestCaseJ4 {
@BeforeClass @BeforeClass
public static void beforeClass() throws Exception { public static void beforeClass() throws Exception {
initCore("solrconfig.xml","schema.xml"); initCore("solrconfig-spellcheckcomponent.xml","schema.xml");
//Index something with a title //Index something with a title
assertNull(h.validateUpdate(adoc("id", "0", "teststop", "This is a title"))); assertNull(h.validateUpdate(adoc("id", "0", "teststop", "This is a title")));
assertNull(h.validateUpdate(adoc("id", "1", "teststop", "The quick reb fox jumped over the lazy brown dogs."))); assertNull(h.validateUpdate(adoc("id", "1", "teststop", "The quick reb fox jumped over the lazy brown dogs.")));
assertNull(h.validateUpdate(adoc("id", "2", "teststop", "This is a Solr"))); assertNull(h.validateUpdate(adoc("id", "2", "teststop", "This is a Solr")));
assertNull(h.validateUpdate(adoc("id", "3", "teststop", "solr foo"))); assertNull(h.validateUpdate(adoc("id", "3", "teststop", "solr foo")));
assertNull(h.validateUpdate(adoc("id", "4", "teststop", "another foo")));
assertNull(h.validateUpdate(commit())); assertNull(h.validateUpdate(commit()));
queryConverter = new SimpleQueryConverter(); queryConverter = new SimpleQueryConverter();
queryConverter.init(new NamedList()); queryConverter.init(new NamedList());
@ -77,4 +82,15 @@ public class DirectSolrSpellCheckerTest extends SolrTestCaseJ4 {
assertTrue("suggestions is not null and it should be", suggestions == null); assertTrue("suggestions is not null and it should be", suggestions == null);
searcher.decref(); searcher.decref();
} }
@Test
public void testOnlyMorePopularWithExtendedResults() throws Exception {
assertQ(req("q", "teststop:fox", "qt", "spellCheckCompRH", SpellCheckComponent.COMPONENT_NAME, "true", SpellCheckComponent.SPELLCHECK_DICT, "direct", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_ONLY_MORE_POPULAR, "true"),
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='fox']/int[@name='origFreq']=1",
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='fox']/arr[@name='suggestion']/lst/str[@name='word']='foo'",
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='fox']/arr[@name='suggestion']/lst/int[@name='freq']=2",
"//lst[@name='spellcheck']/lst[@name='suggestions']/bool[@name='correctlySpelled']='true'"
);
}
} }