From 5a1e1bf4c8dc625a1d84cf3ecbce853f1af50f66 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Tue, 7 Jun 2011 22:48:41 +0000 Subject: [PATCH] SOLR-2576: DirectSolrSpellChecker was not returning freq info for the original token git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1133187 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 7 +++++-- .../handler/component/SpellCheckComponent.java | 2 +- .../spelling/AbstractLuceneSpellChecker.java | 2 +- .../solr/spelling/DirectSolrSpellChecker.java | 10 +++++++--- .../apache/solr/spelling/SpellingResult.java | 8 +++++++- .../conf/solrconfig-spellcheckcomponent.xml | 6 ++++++ .../component/SpellCheckComponentTest.java | 8 +++----- .../spelling/DirectSolrSpellCheckerTest.java | 18 +++++++++++++++++- 8 files changed, 47 insertions(+), 14 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index b5a8d9f2c7c..1d735f23870 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -94,8 +94,8 @@ New Features * SOLR-792: Adding PivotFacetComponent for Hierarchical faceting (erik, Jeremy Hinegardner, Thibaut Lassalle, ryan) -* LUCENE-2507, SOLR-2571: Added DirectSolrSpellChecker, which uses Lucene's DirectSpellChecker - to retrieve correction candidates directly from the term dictionary using +* LUCENE-2507, SOLR-2571, SOLR-2576: Added DirectSolrSpellChecker, which uses Lucene's + DirectSpellChecker to retrieve correction candidates directly from the term dictionary using levenshtein automata. (James Dyer, rmuir) * SOLR-1873: SolrCloud - added shared/central config and core/shard managment via zookeeper, @@ -256,6 +256,9 @@ Other Changes HighFrequencyDictionary, SortedIterator, TermFreqIterator, and the suggester APIs and implementations. (rmuir) +* SOLR-2576: Remove deprecated SpellingResult.add(Token, int). + (James Dyer via rmuir) + Documentation ---------------------- diff --git a/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java b/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java index 559bbd81f83..fea75dfe110 100644 --- a/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java @@ -435,7 +435,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar if (extendedResults) { Integer o = origVsFreq.get(original); - if (o != null) result.add(token, o); + if (o != null) result.addFrequency(token, o); for (SuggestWord word : suggestions) result.add(token, word.string, word.freq); } else { diff --git a/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java b/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java index 2a53e0f58f1..9a8713a196e 100644 --- a/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java +++ b/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java @@ -182,7 +182,7 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker { if (options.extendedResults == true && reader != null && field != null) { term = term.createTerm(tokenText); - result.add(token, reader.docFreq(term)); + result.addFrequency(token, reader.docFreq(term)); int countLimit = Math.min(options.count, suggestions.length); if(countLimit>0) { diff --git a/solr/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java b/solr/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java index 38cc71e6ba1..774e4cc6d8f 100644 --- a/solr/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java +++ b/solr/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java @@ -27,6 +27,7 @@ import org.apache.lucene.search.spell.StringDistance; import org.apache.lucene.search.spell.SuggestWord; import org.apache.lucene.search.spell.SuggestWordFrequencyComparator; import org.apache.lucene.search.spell.SuggestWordQueue; +import org.apache.solr.common.params.SpellingParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.search.SolrIndexSearcher; @@ -179,10 +180,13 @@ public class DirectSolrSpellChecker extends SolrSpellChecker { float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy; for (Token token : options.tokens) { - SuggestWord[] suggestions = checker.suggestSimilar(new Term(field, token.toString()), + Term term = new Term(field, token.toString()); + SuggestWord[] suggestions = checker.suggestSimilar(term, options.count, options.reader, options.onlyMorePopular, accuracy); - for (SuggestWord suggestion : suggestions) - result.add(token, suggestion.string, suggestion.freq); + result.addFrequency(token, options.reader.docFreq(term)); + for (SuggestWord suggestion : suggestions) { + result.add(token, suggestion.string, suggestion.freq); + } } return result; } diff --git a/solr/src/java/org/apache/solr/spelling/SpellingResult.java b/solr/src/java/org/apache/solr/spelling/SpellingResult.java index aeeacbd2287..7eb1c6696c3 100644 --- a/solr/src/java/org/apache/solr/spelling/SpellingResult.java +++ b/solr/src/java/org/apache/solr/spelling/SpellingResult.java @@ -66,7 +66,13 @@ public class SpellingResult { } } - public void add(Token token, int docFreq) { + /** + * Adds an original token with its document frequency + * + * @param token original token + * @param docFreq original token's document frequency + */ + public void addFrequency(Token token, int docFreq) { if (tokenFrequency == null) { tokenFrequency = new LinkedHashMap(); } diff --git a/solr/src/test-files/solr/conf/solrconfig-spellcheckcomponent.xml b/solr/src/test-files/solr/conf/solrconfig-spellcheckcomponent.xml index 322c938f194..34d33673043 100644 --- a/solr/src/test-files/solr/conf/solrconfig-spellcheckcomponent.xml +++ b/solr/src/test-files/solr/conf/solrconfig-spellcheckcomponent.xml @@ -49,6 +49,12 @@ Config for testing spellcheck component spellchecker1 true + + direct + solr.DirectSolrSpellChecker + 3 + teststop + threshold lowerfilt diff --git a/solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java b/solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java index a2754f549ee..eda48e9c7b5 100644 --- a/solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java +++ b/solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java @@ -198,16 +198,15 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 { public void testThresholdTokenFrequency() throws Exception { //"document" is in 2 documents but "another" is only in 1. - //So with a threshold of 15%, "another" is absent from the dictionary + //So with a threshold of 29%, "another" is absent from the dictionary //while "document" is present. assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documenq", SpellCheckComponent.SPELLCHECK_DICT, "threshold", SpellCheckComponent.SPELLCHECK_COUNT,"5", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS,"true") ,"/spellcheck/suggestions/[1]/suggestion==[{'word':'document','freq':2}]" ); - //TODO: DirectSolrSpellChecker returns a different format. Is this OK? Does SOLRJ need tweaking to handle this??? assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documenq", SpellCheckComponent.SPELLCHECK_DICT, "threshold_direct", SpellCheckComponent.SPELLCHECK_COUNT,"5", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS,"true") - ,"/spellcheck/suggestions/[1]/suggestion==['document']]" + ,"/spellcheck/suggestions/[1]/suggestion==[{'word':'document','freq':2}]" ); //TODO: how do we make this into a 1-liner using "assertQ()" ??? @@ -246,7 +245,6 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 { suggestions = (NamedList) spellCheck.get("suggestions"); assertTrue(suggestions.get("suggestion")==null); - //TODO: Why is DirectSolrSpellChecker returning "true" here? Is that OK? - //assertTrue((Boolean) suggestions.get("correctlySpelled")==false); + assertTrue((Boolean) suggestions.get("correctlySpelled")==false); } } diff --git a/solr/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java b/solr/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java index e96700f7a00..dca2c9dc363 100644 --- a/solr/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java +++ b/solr/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java @@ -22,10 +22,14 @@ import java.util.Map; import org.apache.lucene.analysis.Token; import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; +import org.apache.solr.handler.component.SpellCheckComponent; +import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.util.RefCounted; +import org.apache.solr.util.TestHarness; import org.junit.BeforeClass; import org.junit.Test; @@ -38,12 +42,13 @@ public class DirectSolrSpellCheckerTest extends SolrTestCaseJ4 { @BeforeClass public static void beforeClass() throws Exception { - initCore("solrconfig.xml","schema.xml"); + initCore("solrconfig-spellcheckcomponent.xml","schema.xml"); //Index something with a title assertNull(h.validateUpdate(adoc("id", "0", "teststop", "This is a title"))); assertNull(h.validateUpdate(adoc("id", "1", "teststop", "The quick reb fox jumped over the lazy brown dogs."))); assertNull(h.validateUpdate(adoc("id", "2", "teststop", "This is a Solr"))); assertNull(h.validateUpdate(adoc("id", "3", "teststop", "solr foo"))); + assertNull(h.validateUpdate(adoc("id", "4", "teststop", "another foo"))); assertNull(h.validateUpdate(commit())); queryConverter = new SimpleQueryConverter(); queryConverter.init(new NamedList()); @@ -77,4 +82,15 @@ public class DirectSolrSpellCheckerTest extends SolrTestCaseJ4 { assertTrue("suggestions is not null and it should be", suggestions == null); searcher.decref(); } + + @Test + public void testOnlyMorePopularWithExtendedResults() throws Exception { + assertQ(req("q", "teststop:fox", "qt", "spellCheckCompRH", SpellCheckComponent.COMPONENT_NAME, "true", SpellCheckComponent.SPELLCHECK_DICT, "direct", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_ONLY_MORE_POPULAR, "true"), + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='fox']/int[@name='origFreq']=1", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='fox']/arr[@name='suggestion']/lst/str[@name='word']='foo'", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='fox']/arr[@name='suggestion']/lst/int[@name='freq']=2", + "//lst[@name='spellcheck']/lst[@name='suggestions']/bool[@name='correctlySpelled']='true'" + ); + } + }