SOLR-2576: DirectSolrSpellChecker was not returning freq info for the original token

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1133187 13f79535-47bb-0310-9956-ffa450edef68
2011-06-07 22:48:41 +00:00 · 2011-06-07 22:48:41 +00:00 · 5a1e1bf4c8
parent aa92c15202
commit 5a1e1bf4c8
8 changed files with 47 additions and 14 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -94,8 +94,8 @@ New Features
 * SOLR-792: Adding PivotFacetComponent for Hierarchical faceting
  (erik, Jeremy Hinegardner, Thibaut Lassalle, ryan)

-* LUCENE-2507, SOLR-2571: Added DirectSolrSpellChecker, which uses Lucene's DirectSpellChecker
-  to retrieve correction candidates directly from the term dictionary using
+* LUCENE-2507, SOLR-2571, SOLR-2576: Added DirectSolrSpellChecker, which uses Lucene's
+  DirectSpellChecker to retrieve correction candidates directly from the term dictionary using
  levenshtein automata.  (James Dyer, rmuir)
   
 * SOLR-1873: SolrCloud - added shared/central config and core/shard managment via zookeeper,
@ -256,6 +256,9 @@ Other Changes
  HighFrequencyDictionary, SortedIterator, TermFreqIterator, and the
  suggester APIs and implementations. (rmuir)

+* SOLR-2576: Remove deprecated SpellingResult.add(Token, int).
+  (James Dyer via rmuir)
+
 Documentation
 ----------------------

--- a/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
+++ b/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
@ -435,7 +435,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar

      if (extendedResults) {
        Integer o = origVsFreq.get(original);
-        if (o != null) result.add(token, o);
+        if (o != null) result.addFrequency(token, o);
        for (SuggestWord word : suggestions)
          result.add(token, word.string, word.freq);
      } else {
--- a/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
+++ b/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
@ -182,7 +182,7 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {

      if (options.extendedResults == true && reader != null && field != null) {
        term = term.createTerm(tokenText);
-        result.add(token, reader.docFreq(term));
+        result.addFrequency(token, reader.docFreq(term));
        int countLimit = Math.min(options.count, suggestions.length);
        if(countLimit>0)
        {
--- a/solr/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java
+++ b/solr/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java
@ -27,6 +27,7 @@ import org.apache.lucene.search.spell.StringDistance;
 import org.apache.lucene.search.spell.SuggestWord;
 import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
 import org.apache.lucene.search.spell.SuggestWordQueue;
+import org.apache.solr.common.params.SpellingParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.search.SolrIndexSearcher;
@ -179,11 +180,14 @@ public class DirectSolrSpellChecker extends SolrSpellChecker {
    float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy;
    
    for (Token token : options.tokens) {
-      SuggestWord[] suggestions = checker.suggestSimilar(new Term(field, token.toString()), 
+    	Term term = new Term(field, token.toString());
+      SuggestWord[] suggestions = checker.suggestSimilar(term, 
          options.count, options.reader, options.onlyMorePopular, accuracy);
-      for (SuggestWord suggestion : suggestions)
+      result.addFrequency(token, options.reader.docFreq(term));
+      for (SuggestWord suggestion : suggestions) {
        result.add(token, suggestion.string, suggestion.freq);      	
      }
+    }
    return result;
  }
 }
--- a/solr/src/java/org/apache/solr/spelling/SpellingResult.java
+++ b/solr/src/java/org/apache/solr/spelling/SpellingResult.java
@ -66,7 +66,13 @@ public class SpellingResult {
    }
  }

-  public void add(Token token, int docFreq) {
+  /**
+   * Adds an original token with its document frequency
+   * 
+   * @param token original token
+   * @param docFreq original token's document frequency
+   */
+  public void addFrequency(Token token, int docFreq) {
    if (tokenFrequency == null) {
      tokenFrequency = new LinkedHashMap<Token, Integer>();
    }
--- a/solr/src/test-files/solr/conf/solrconfig-spellcheckcomponent.xml
+++ b/solr/src/test-files/solr/conf/solrconfig-spellcheckcomponent.xml
@ -49,6 +49,12 @@ Config for testing spellcheck component
      <str name="spellcheckIndexDir">spellchecker1</str>
      <str name="buildOnCommit">true</str>
    </lst>
+    <lst name="spellchecker">
+      <str name="name">direct</str>
+      <str name="classname">solr.DirectSolrSpellChecker</str>
+      <int name="minQueryLength">3</int>
+      <str name="field">teststop</str>
+    </lst>
    <lst name="spellchecker">
      <str name="name">threshold</str>
      <str name="field">lowerfilt</str>
--- a/solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java
+++ b/solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java
@ -198,16 +198,15 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
    public void testThresholdTokenFrequency() throws Exception {
    	
  	  	//"document" is in 2 documents but "another" is only in 1.  
-  	  	//So with a threshold of 15%, "another" is absent from the dictionary 
+  	  	//So with a threshold of 29%, "another" is absent from the dictionary 
  	  	//while "document" is present.
    	
  	  	assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documenq", SpellCheckComponent.SPELLCHECK_DICT, "threshold", SpellCheckComponent.SPELLCHECK_COUNT,"5", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS,"true")
  	        ,"/spellcheck/suggestions/[1]/suggestion==[{'word':'document','freq':2}]"
  	    );
  	  	
-  	  	//TODO:  DirectSolrSpellChecker returns a different format.  Is this OK?  Does SOLRJ need tweaking to handle this???
  	  	assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documenq", SpellCheckComponent.SPELLCHECK_DICT, "threshold_direct", SpellCheckComponent.SPELLCHECK_COUNT,"5", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS,"true")
-  	        ,"/spellcheck/suggestions/[1]/suggestion==['document']]"
+  	        ,"/spellcheck/suggestions/[1]/suggestion==[{'word':'document','freq':2}]"
  	    );
  	  	
  	  	//TODO:  how do we make this into a 1-liner using "assertQ()" ???
@ -246,7 +245,6 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
  			suggestions = (NamedList) spellCheck.get("suggestions");
  			assertTrue(suggestions.get("suggestion")==null);
  			
-  			//TODO: Why is DirectSolrSpellChecker returning "true" here?  Is that OK?
-  			//assertTrue((Boolean) suggestions.get("correctlySpelled")==false);
+  			assertTrue((Boolean) suggestions.get("correctlySpelled")==false);
    }
 }
--- a/solr/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java
+++ b/solr/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java
@ -22,10 +22,14 @@ import java.util.Map;

 import org.apache.lucene.analysis.Token;
 import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.component.SpellCheckComponent;
+import org.apache.solr.request.LocalSolrQueryRequest;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.util.RefCounted;
+import org.apache.solr.util.TestHarness;
 import org.junit.BeforeClass;
 import org.junit.Test;

@ -38,12 +42,13 @@ public class DirectSolrSpellCheckerTest extends SolrTestCaseJ4 {

  @BeforeClass
  public static void beforeClass() throws Exception {
-    initCore("solrconfig.xml","schema.xml");
+    initCore("solrconfig-spellcheckcomponent.xml","schema.xml");
    //Index something with a title
    assertNull(h.validateUpdate(adoc("id", "0", "teststop", "This is a title")));
    assertNull(h.validateUpdate(adoc("id", "1", "teststop", "The quick reb fox jumped over the lazy brown dogs.")));
    assertNull(h.validateUpdate(adoc("id", "2", "teststop", "This is a Solr")));
    assertNull(h.validateUpdate(adoc("id", "3", "teststop", "solr foo")));
+    assertNull(h.validateUpdate(adoc("id", "4", "teststop", "another foo")));
    assertNull(h.validateUpdate(commit()));
    queryConverter = new SimpleQueryConverter();
    queryConverter.init(new NamedList());
@ -77,4 +82,15 @@ public class DirectSolrSpellCheckerTest extends SolrTestCaseJ4 {
    assertTrue("suggestions is not null and it should be", suggestions == null);
    searcher.decref();
  }
+  
+  @Test
+  public void testOnlyMorePopularWithExtendedResults() throws Exception {
+  	assertQ(req("q", "teststop:fox", "qt", "spellCheckCompRH", SpellCheckComponent.COMPONENT_NAME, "true", SpellCheckComponent.SPELLCHECK_DICT, "direct", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_ONLY_MORE_POPULAR, "true"),
+        "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='fox']/int[@name='origFreq']=1",
+        "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='fox']/arr[@name='suggestion']/lst/str[@name='word']='foo'",
+        "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='fox']/arr[@name='suggestion']/lst/int[@name='freq']=2",
+        "//lst[@name='spellcheck']/lst[@name='suggestions']/bool[@name='correctlySpelled']='true'"
+  	);
+  }  
+  
 }