diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 97a6b5a1183..8833db90edd 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -94,9 +94,9 @@ New Features
* SOLR-792: Adding PivotFacetComponent for Hierarchical faceting
(erik, Jeremy Hinegardner, Thibaut Lassalle, ryan)
-* LUCENE-2507: Added DirectSolrSpellChecker, which uses Lucene's DirectSpellChecker
+* LUCENE-2507, SOLR-2571: Added DirectSolrSpellChecker, which uses Lucene's DirectSpellChecker
to retrieve correction candidates directly from the term dictionary using
- levenshtein automata. (rmuir)
+ levenshtein automata. (James Dyer, rmuir)
* SOLR-1873: SolrCloud - added shared/central config and core/shard managment via zookeeper,
built-in load balancing, and infrastructure for future SolrCloud work. (yonik, Mark Miller)
@@ -282,6 +282,13 @@ Bug Fixes
parameter is added to avoid excessive CPU time in extreme cases (e.g. long
queries with many misspelled words). (James Dyer via rmuir)
+Other Changes
+----------------------
+
+* SOLR-2571: Add a commented out example of the spellchecker's thresholdTokenFrequency
+ parameter to the example solrconfig.xml, and also add a unit test for this feature.
+ (James Dyer via rmuir)
+
================== 3.2.0 ==================
Versions of Major Components
---------------------
diff --git a/solr/example/solr/conf/solrconfig.xml b/solr/example/solr/conf/solrconfig.xml
index 4bbc9aa1b14..b1d30b4af86 100755
--- a/solr/example/solr/conf/solrconfig.xml
+++ b/solr/example/solr/conf/solrconfig.xml
@@ -1084,6 +1084,9 @@
default
name
spellchecker
+
diff --git a/solr/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java b/solr/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java
index f3b9d52bad8..38cc71e6ba1 100644
--- a/solr/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java
+++ b/solr/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java
@@ -59,31 +59,29 @@ import org.slf4j.LoggerFactory;
public class DirectSolrSpellChecker extends SolrSpellChecker {
private static final Logger LOG = LoggerFactory.getLogger(DirectSolrSpellChecker.class);
- /** Field to use as the source of terms */
- public static final String FIELD = "field";
+ // configuration params shared with other spellcheckers
+ public static final String COMPARATOR_CLASS = AbstractLuceneSpellChecker.COMPARATOR_CLASS;
+ public static final String SCORE_COMP = AbstractLuceneSpellChecker.SCORE_COMP;
+ public static final String FREQ_COMP = AbstractLuceneSpellChecker.FREQ_COMP;
+ public static final String FIELD = AbstractLuceneSpellChecker.FIELD;
+ public static final String STRING_DISTANCE = AbstractLuceneSpellChecker.STRING_DISTANCE;
+ public static final String ACCURACY = AbstractLuceneSpellChecker.ACCURACY;
+ public static final String THRESHOLD_TOKEN_FREQUENCY = IndexBasedSpellChecker.THRESHOLD_TOKEN_FREQUENCY;
- public static final String STRING_DISTANCE = "distanceMeasure";
public static final String INTERNAL_DISTANCE = "internal";
-
- public static final String ACCURACY = "accuracy";
public static final float DEFAULT_ACCURACY = 0.5f;
-
+ public static final float DEFAULT_THRESHOLD_TOKEN_FREQUENCY = 0.0f;
+
public static final String MAXEDITS = "maxEdits";
public static final int DEFAULT_MAXEDITS = 2;
+ // params specific to this implementation
public static final String MINPREFIX = "minPrefix";
public static final int DEFAULT_MINPREFIX = 1;
public static final String MAXINSPECTIONS = "maxInspections";
public static final int DEFAULT_MAXINSPECTIONS = 5;
- public static final String COMPARATOR_CLASS = "comparatorClass";
- public static final String SCORE_COMP = "score";
- public static final String FREQ_COMP = "freq";
-
- public static final String THRESHOLD = "thresholdTokenFrequency";
- public static final float DEFAULT_THRESHOLD = 0.0f;
-
public static final String MINQUERYLENGTH = "minQueryLength";
public static final int DEFAULT_MINQUERYLENGTH = 4;
@@ -117,39 +115,39 @@ public class DirectSolrSpellChecker extends SolrSpellChecker {
field = (String) config.get(FIELD);
float minAccuracy = DEFAULT_ACCURACY;
- String accuracy = (String) config.get(ACCURACY);
+ Float accuracy = (Float) config.get(ACCURACY);
if (accuracy != null)
- minAccuracy = Float.parseFloat(accuracy);
+ minAccuracy = accuracy;
int maxEdits = DEFAULT_MAXEDITS;
- String edits = (String) config.get(MAXEDITS);
+ Integer edits = (Integer) config.get(MAXEDITS);
if (edits != null)
- maxEdits = Integer.parseInt(edits);
+ maxEdits = edits;
int minPrefix = DEFAULT_MINPREFIX;
- String prefix = (String) config.get(MINPREFIX);
+ Integer prefix = (Integer) config.get(MINPREFIX);
if (prefix != null)
- minPrefix = Integer.parseInt(prefix);
+ minPrefix = prefix;
int maxInspections = DEFAULT_MAXINSPECTIONS;
- String inspections = (String) config.get(MAXINSPECTIONS);
+ Integer inspections = (Integer) config.get(MAXINSPECTIONS);
if (inspections != null)
- maxInspections = Integer.parseInt(inspections);
+ maxInspections = inspections;
- float minThreshold = DEFAULT_THRESHOLD;
- String threshold = (String) config.get(THRESHOLD);
+ float minThreshold = DEFAULT_THRESHOLD_TOKEN_FREQUENCY;
+ Float threshold = (Float) config.get(THRESHOLD_TOKEN_FREQUENCY);
if (threshold != null)
- minThreshold = Float.parseFloat(threshold);
+ minThreshold = threshold;
int minQueryLength = DEFAULT_MINQUERYLENGTH;
- String queryLength = (String) config.get(MINQUERYLENGTH);
+ Integer queryLength = (Integer) config.get(MINQUERYLENGTH);
if (queryLength != null)
- minQueryLength = Integer.parseInt(queryLength);
+ minQueryLength = queryLength;
float maxQueryFrequency = DEFAULT_MAXQUERYFREQUENCY;
- String queryFreq = (String) config.get(MAXQUERYFREQUENCY);
+ Float queryFreq = (Float) config.get(MAXQUERYFREQUENCY);
if (queryFreq != null)
- maxQueryFrequency = Float.parseFloat(queryFreq);
+ maxQueryFrequency = queryFreq;
checker.setComparator(comp);
checker.setDistance(sd);
diff --git a/solr/src/test-files/solr/conf/solrconfig-spellcheckcomponent.xml b/solr/src/test-files/solr/conf/solrconfig-spellcheckcomponent.xml
index 7141abf016b..322c938f194 100644
--- a/solr/src/test-files/solr/conf/solrconfig-spellcheckcomponent.xml
+++ b/solr/src/test-files/solr/conf/solrconfig-spellcheckcomponent.xml
@@ -49,6 +49,21 @@ Config for testing spellcheck component
spellchecker1
true
+
+ threshold
+ lowerfilt
+ spellcheckerThreshold
+ true
+ .29
+
+
+ threshold_direct
+ solr.DirectSolrSpellChecker
+ lowerfilt
+ spellcheckerThreshold
+ true
+ .29
+
multipleFields
lowerfilt1and2
diff --git a/solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java b/solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java
index 30972f8b881..a2754f549ee 100644
--- a/solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java
+++ b/solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java
@@ -21,10 +21,15 @@ import java.io.File;
import java.util.*;
import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SpellingParams;
import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.SolrCore;
+import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.spelling.AbstractLuceneSpellChecker;
import org.junit.BeforeClass;
@@ -188,4 +193,60 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
assertQ(req, "//arr[@name='suggestion'][.='lucenejava']");
}
+
+ @Test
+ public void testThresholdTokenFrequency() throws Exception {
+
+ //"document" is in 2 documents but "another" is only in 1.
+ //So with a threshold of 15%, "another" is absent from the dictionary
+ //while "document" is present.
+
+ assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documenq", SpellCheckComponent.SPELLCHECK_DICT, "threshold", SpellCheckComponent.SPELLCHECK_COUNT,"5", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS,"true")
+ ,"/spellcheck/suggestions/[1]/suggestion==[{'word':'document','freq':2}]"
+ );
+
+ //TODO: DirectSolrSpellChecker returns a different format. Is this OK? Does SOLRJ need tweaking to handle this???
+ assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documenq", SpellCheckComponent.SPELLCHECK_DICT, "threshold_direct", SpellCheckComponent.SPELLCHECK_COUNT,"5", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS,"true")
+ ,"/spellcheck/suggestions/[1]/suggestion==['document']]"
+ );
+
+ //TODO: how do we make this into a 1-liner using "assertQ()" ???
+ SolrCore core = h.getCore();
+ SearchComponent speller = core.getSearchComponent("spellcheck");
+ assertTrue("speller is null and it shouldn't be", speller != null);
+
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ params.add(SpellCheckComponent.COMPONENT_NAME, "true");
+ params.add(SpellCheckComponent.SPELLCHECK_COUNT, "10");
+ params.add(SpellCheckComponent.SPELLCHECK_DICT, "threshold");
+ params.add(SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS,"true");
+ params.add(CommonParams.Q, "anotheq");
+
+ SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH");
+ SolrQueryResponse rsp = new SolrQueryResponse();
+ rsp.add("responseHeader", new SimpleOrderedMap());
+ SolrQueryRequest req = new LocalSolrQueryRequest(core, params);
+ handler.handleRequest(req, rsp);
+ req.close();
+ NamedList values = rsp.getValues();
+ NamedList spellCheck = (NamedList) values.get("spellcheck");
+ NamedList suggestions = (NamedList) spellCheck.get("suggestions");
+ assertTrue(suggestions.get("suggestion")==null);
+ assertTrue((Boolean) suggestions.get("correctlySpelled")==false);
+
+ params.remove(SpellCheckComponent.SPELLCHECK_DICT);
+ params.add(SpellCheckComponent.SPELLCHECK_DICT, "threshold_direct");
+ rsp = new SolrQueryResponse();
+ rsp.add("responseHeader", new SimpleOrderedMap());
+ req = new LocalSolrQueryRequest(core, params);
+ handler.handleRequest(req, rsp);
+ req.close();
+ values = rsp.getValues();
+ spellCheck = (NamedList) values.get("spellcheck");
+ suggestions = (NamedList) spellCheck.get("suggestions");
+ assertTrue(suggestions.get("suggestion")==null);
+
+ //TODO: Why is DirectSolrSpellChecker returning "true" here? Is that OK?
+ //assertTrue((Boolean) suggestions.get("correctlySpelled")==false);
+ }
}
diff --git a/solr/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java b/solr/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java
index 7e93afb8358..e96700f7a00 100644
--- a/solr/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java
+++ b/solr/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java
@@ -55,7 +55,7 @@ public class DirectSolrSpellCheckerTest extends SolrTestCaseJ4 {
NamedList spellchecker = new NamedList();
spellchecker.add("classname", DirectSolrSpellChecker.class.getName());
spellchecker.add(DirectSolrSpellChecker.FIELD, "teststop");
- spellchecker.add(DirectSolrSpellChecker.MINQUERYLENGTH, "2"); // we will try "fob"
+ spellchecker.add(DirectSolrSpellChecker.MINQUERYLENGTH, 2); // we will try "fob"
SolrCore core = h.getCore();
checker.init(spellchecker, core);