mirror of https://github.com/apache/lucene.git
SOLR-2576: DirectSolrSpellChecker was not returning freq info for the original token
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1133187 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
aa92c15202
commit
5a1e1bf4c8
|
@ -94,8 +94,8 @@ New Features
|
||||||
* SOLR-792: Adding PivotFacetComponent for Hierarchical faceting
|
* SOLR-792: Adding PivotFacetComponent for Hierarchical faceting
|
||||||
(erik, Jeremy Hinegardner, Thibaut Lassalle, ryan)
|
(erik, Jeremy Hinegardner, Thibaut Lassalle, ryan)
|
||||||
|
|
||||||
* LUCENE-2507, SOLR-2571: Added DirectSolrSpellChecker, which uses Lucene's DirectSpellChecker
|
* LUCENE-2507, SOLR-2571, SOLR-2576: Added DirectSolrSpellChecker, which uses Lucene's
|
||||||
to retrieve correction candidates directly from the term dictionary using
|
DirectSpellChecker to retrieve correction candidates directly from the term dictionary using
|
||||||
levenshtein automata. (James Dyer, rmuir)
|
levenshtein automata. (James Dyer, rmuir)
|
||||||
|
|
||||||
* SOLR-1873: SolrCloud - added shared/central config and core/shard managment via zookeeper,
|
* SOLR-1873: SolrCloud - added shared/central config and core/shard managment via zookeeper,
|
||||||
|
@ -256,6 +256,9 @@ Other Changes
|
||||||
HighFrequencyDictionary, SortedIterator, TermFreqIterator, and the
|
HighFrequencyDictionary, SortedIterator, TermFreqIterator, and the
|
||||||
suggester APIs and implementations. (rmuir)
|
suggester APIs and implementations. (rmuir)
|
||||||
|
|
||||||
|
* SOLR-2576: Remove deprecated SpellingResult.add(Token, int).
|
||||||
|
(James Dyer via rmuir)
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -435,7 +435,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
||||||
|
|
||||||
if (extendedResults) {
|
if (extendedResults) {
|
||||||
Integer o = origVsFreq.get(original);
|
Integer o = origVsFreq.get(original);
|
||||||
if (o != null) result.add(token, o);
|
if (o != null) result.addFrequency(token, o);
|
||||||
for (SuggestWord word : suggestions)
|
for (SuggestWord word : suggestions)
|
||||||
result.add(token, word.string, word.freq);
|
result.add(token, word.string, word.freq);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -182,7 +182,7 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
|
||||||
|
|
||||||
if (options.extendedResults == true && reader != null && field != null) {
|
if (options.extendedResults == true && reader != null && field != null) {
|
||||||
term = term.createTerm(tokenText);
|
term = term.createTerm(tokenText);
|
||||||
result.add(token, reader.docFreq(term));
|
result.addFrequency(token, reader.docFreq(term));
|
||||||
int countLimit = Math.min(options.count, suggestions.length);
|
int countLimit = Math.min(options.count, suggestions.length);
|
||||||
if(countLimit>0)
|
if(countLimit>0)
|
||||||
{
|
{
|
||||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.search.spell.StringDistance;
|
||||||
import org.apache.lucene.search.spell.SuggestWord;
|
import org.apache.lucene.search.spell.SuggestWord;
|
||||||
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
|
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
|
||||||
import org.apache.lucene.search.spell.SuggestWordQueue;
|
import org.apache.lucene.search.spell.SuggestWordQueue;
|
||||||
|
import org.apache.solr.common.params.SpellingParams;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
import org.apache.solr.search.SolrIndexSearcher;
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
@ -179,10 +180,13 @@ public class DirectSolrSpellChecker extends SolrSpellChecker {
|
||||||
float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy;
|
float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy;
|
||||||
|
|
||||||
for (Token token : options.tokens) {
|
for (Token token : options.tokens) {
|
||||||
SuggestWord[] suggestions = checker.suggestSimilar(new Term(field, token.toString()),
|
Term term = new Term(field, token.toString());
|
||||||
|
SuggestWord[] suggestions = checker.suggestSimilar(term,
|
||||||
options.count, options.reader, options.onlyMorePopular, accuracy);
|
options.count, options.reader, options.onlyMorePopular, accuracy);
|
||||||
for (SuggestWord suggestion : suggestions)
|
result.addFrequency(token, options.reader.docFreq(term));
|
||||||
result.add(token, suggestion.string, suggestion.freq);
|
for (SuggestWord suggestion : suggestions) {
|
||||||
|
result.add(token, suggestion.string, suggestion.freq);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
|
@ -66,7 +66,13 @@ public class SpellingResult {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void add(Token token, int docFreq) {
|
/**
|
||||||
|
* Adds an original token with its document frequency
|
||||||
|
*
|
||||||
|
* @param token original token
|
||||||
|
* @param docFreq original token's document frequency
|
||||||
|
*/
|
||||||
|
public void addFrequency(Token token, int docFreq) {
|
||||||
if (tokenFrequency == null) {
|
if (tokenFrequency == null) {
|
||||||
tokenFrequency = new LinkedHashMap<Token, Integer>();
|
tokenFrequency = new LinkedHashMap<Token, Integer>();
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,6 +49,12 @@ Config for testing spellcheck component
|
||||||
<str name="spellcheckIndexDir">spellchecker1</str>
|
<str name="spellcheckIndexDir">spellchecker1</str>
|
||||||
<str name="buildOnCommit">true</str>
|
<str name="buildOnCommit">true</str>
|
||||||
</lst>
|
</lst>
|
||||||
|
<lst name="spellchecker">
|
||||||
|
<str name="name">direct</str>
|
||||||
|
<str name="classname">solr.DirectSolrSpellChecker</str>
|
||||||
|
<int name="minQueryLength">3</int>
|
||||||
|
<str name="field">teststop</str>
|
||||||
|
</lst>
|
||||||
<lst name="spellchecker">
|
<lst name="spellchecker">
|
||||||
<str name="name">threshold</str>
|
<str name="name">threshold</str>
|
||||||
<str name="field">lowerfilt</str>
|
<str name="field">lowerfilt</str>
|
||||||
|
|
|
@ -198,16 +198,15 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
|
||||||
public void testThresholdTokenFrequency() throws Exception {
|
public void testThresholdTokenFrequency() throws Exception {
|
||||||
|
|
||||||
//"document" is in 2 documents but "another" is only in 1.
|
//"document" is in 2 documents but "another" is only in 1.
|
||||||
//So with a threshold of 15%, "another" is absent from the dictionary
|
//So with a threshold of 29%, "another" is absent from the dictionary
|
||||||
//while "document" is present.
|
//while "document" is present.
|
||||||
|
|
||||||
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documenq", SpellCheckComponent.SPELLCHECK_DICT, "threshold", SpellCheckComponent.SPELLCHECK_COUNT,"5", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS,"true")
|
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documenq", SpellCheckComponent.SPELLCHECK_DICT, "threshold", SpellCheckComponent.SPELLCHECK_COUNT,"5", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS,"true")
|
||||||
,"/spellcheck/suggestions/[1]/suggestion==[{'word':'document','freq':2}]"
|
,"/spellcheck/suggestions/[1]/suggestion==[{'word':'document','freq':2}]"
|
||||||
);
|
);
|
||||||
|
|
||||||
//TODO: DirectSolrSpellChecker returns a different format. Is this OK? Does SOLRJ need tweaking to handle this???
|
|
||||||
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documenq", SpellCheckComponent.SPELLCHECK_DICT, "threshold_direct", SpellCheckComponent.SPELLCHECK_COUNT,"5", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS,"true")
|
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documenq", SpellCheckComponent.SPELLCHECK_DICT, "threshold_direct", SpellCheckComponent.SPELLCHECK_COUNT,"5", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS,"true")
|
||||||
,"/spellcheck/suggestions/[1]/suggestion==['document']]"
|
,"/spellcheck/suggestions/[1]/suggestion==[{'word':'document','freq':2}]"
|
||||||
);
|
);
|
||||||
|
|
||||||
//TODO: how do we make this into a 1-liner using "assertQ()" ???
|
//TODO: how do we make this into a 1-liner using "assertQ()" ???
|
||||||
|
@ -246,7 +245,6 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
|
||||||
suggestions = (NamedList) spellCheck.get("suggestions");
|
suggestions = (NamedList) spellCheck.get("suggestions");
|
||||||
assertTrue(suggestions.get("suggestion")==null);
|
assertTrue(suggestions.get("suggestion")==null);
|
||||||
|
|
||||||
//TODO: Why is DirectSolrSpellChecker returning "true" here? Is that OK?
|
assertTrue((Boolean) suggestions.get("correctlySpelled")==false);
|
||||||
//assertTrue((Boolean) suggestions.get("correctlySpelled")==false);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,10 +22,14 @@ import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.solr.SolrTestCaseJ4;
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
|
import org.apache.solr.common.params.CommonParams;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.apache.solr.handler.component.SpellCheckComponent;
|
||||||
|
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||||
import org.apache.solr.search.SolrIndexSearcher;
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
import org.apache.solr.util.RefCounted;
|
import org.apache.solr.util.RefCounted;
|
||||||
|
import org.apache.solr.util.TestHarness;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
@ -38,12 +42,13 @@ public class DirectSolrSpellCheckerTest extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void beforeClass() throws Exception {
|
public static void beforeClass() throws Exception {
|
||||||
initCore("solrconfig.xml","schema.xml");
|
initCore("solrconfig-spellcheckcomponent.xml","schema.xml");
|
||||||
//Index something with a title
|
//Index something with a title
|
||||||
assertNull(h.validateUpdate(adoc("id", "0", "teststop", "This is a title")));
|
assertNull(h.validateUpdate(adoc("id", "0", "teststop", "This is a title")));
|
||||||
assertNull(h.validateUpdate(adoc("id", "1", "teststop", "The quick reb fox jumped over the lazy brown dogs.")));
|
assertNull(h.validateUpdate(adoc("id", "1", "teststop", "The quick reb fox jumped over the lazy brown dogs.")));
|
||||||
assertNull(h.validateUpdate(adoc("id", "2", "teststop", "This is a Solr")));
|
assertNull(h.validateUpdate(adoc("id", "2", "teststop", "This is a Solr")));
|
||||||
assertNull(h.validateUpdate(adoc("id", "3", "teststop", "solr foo")));
|
assertNull(h.validateUpdate(adoc("id", "3", "teststop", "solr foo")));
|
||||||
|
assertNull(h.validateUpdate(adoc("id", "4", "teststop", "another foo")));
|
||||||
assertNull(h.validateUpdate(commit()));
|
assertNull(h.validateUpdate(commit()));
|
||||||
queryConverter = new SimpleQueryConverter();
|
queryConverter = new SimpleQueryConverter();
|
||||||
queryConverter.init(new NamedList());
|
queryConverter.init(new NamedList());
|
||||||
|
@ -77,4 +82,15 @@ public class DirectSolrSpellCheckerTest extends SolrTestCaseJ4 {
|
||||||
assertTrue("suggestions is not null and it should be", suggestions == null);
|
assertTrue("suggestions is not null and it should be", suggestions == null);
|
||||||
searcher.decref();
|
searcher.decref();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testOnlyMorePopularWithExtendedResults() throws Exception {
|
||||||
|
assertQ(req("q", "teststop:fox", "qt", "spellCheckCompRH", SpellCheckComponent.COMPONENT_NAME, "true", SpellCheckComponent.SPELLCHECK_DICT, "direct", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_ONLY_MORE_POPULAR, "true"),
|
||||||
|
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='fox']/int[@name='origFreq']=1",
|
||||||
|
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='fox']/arr[@name='suggestion']/lst/str[@name='word']='foo'",
|
||||||
|
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='fox']/arr[@name='suggestion']/lst/int[@name='freq']=2",
|
||||||
|
"//lst[@name='spellcheck']/lst[@name='suggestions']/bool[@name='correctlySpelled']='true'"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue