mirror of https://github.com/apache/lucene.git
SOLR-4280: Allow specifying "spellcheck.maxResultsForSuggest" as a percentage of filter query results
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1720636 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d74968d925
commit
ade2f7e077
|
@ -225,6 +225,9 @@ New Features
|
|||
|
||||
* SOLR-8434: Add wildcard support to role, to match any role in RuleBasedAuthorizationPlugin (noble)
|
||||
|
||||
* SOLR-4280: Allow specifying "spellcheck.maxResultsForSuggest" as a percentage of filter
|
||||
query results (Markus Jelsma via James Dyer)
|
||||
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
|
|
@ -42,6 +42,7 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.spell.SuggestMode;
|
||||
import org.apache.lucene.search.spell.SuggestWord;
|
||||
import org.apache.solr.client.solrj.response.SpellCheckResponse;
|
||||
|
@ -53,11 +54,17 @@ import org.apache.solr.common.params.SolrParams;
|
|||
import org.apache.solr.common.params.SpellingParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrEventListener;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.QParserPlugin;
|
||||
import org.apache.solr.search.SyntaxError;
|
||||
import org.apache.solr.search.SolrCache;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.spelling.AbstractLuceneSpellChecker;
|
||||
import org.apache.solr.spelling.ConjunctionSolrSpellChecker;
|
||||
|
@ -160,7 +167,9 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
|
||||
float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
|
||||
int alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT, 0);
|
||||
Integer maxResultsForSuggest = params.getInt(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST);
|
||||
//If specified, this can be a discrete # of results, or a percentage of fq results.
|
||||
Integer maxResultsForSuggest = maxResultsForSuggest(rb);
|
||||
|
||||
ModifiableSolrParams customParams = new ModifiableSolrParams();
|
||||
for (String checkerName : getDictionaryNames(params)) {
|
||||
customParams.add(getCustomParams(checkerName, params));
|
||||
|
@ -173,6 +182,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
} else {
|
||||
hits = hitsInteger.longValue();
|
||||
}
|
||||
|
||||
SpellingResult spellingResult = null;
|
||||
if (maxResultsForSuggest == null || hits <= maxResultsForSuggest) {
|
||||
SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
|
||||
|
@ -215,6 +225,59 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
}
|
||||
}
|
||||
|
||||
private Integer maxResultsForSuggest(ResponseBuilder rb) {
|
||||
SolrParams params = rb.req.getParams();
|
||||
float maxResultsForSuggestParamValue = params.getFloat(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, 0.0f);
|
||||
Integer maxResultsForSuggest = null;
|
||||
|
||||
if (maxResultsForSuggestParamValue > 0.0f) {
|
||||
if (maxResultsForSuggestParamValue == (int) maxResultsForSuggestParamValue) {
|
||||
// If a whole number was passed in, this is a discrete number of documents
|
||||
maxResultsForSuggest = (int) maxResultsForSuggestParamValue;
|
||||
} else {
|
||||
// If a fractional value was passed in, this is the % of documents returned by the specified filter
|
||||
// If no specified filter, we use the most restrictive filter of the fq parameters
|
||||
String maxResultsFilterQueryString = params.get(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ);
|
||||
|
||||
int maxResultsByFilters = Integer.MAX_VALUE;
|
||||
SolrIndexSearcher searcher = rb.req.getSearcher();
|
||||
|
||||
try {
|
||||
if (maxResultsFilterQueryString != null) {
|
||||
// Get the default Lucene query parser
|
||||
QParser parser = QParser.getParser(maxResultsFilterQueryString, QParserPlugin.DEFAULT_QTYPE, rb.req);
|
||||
DocSet s = searcher.getDocSet(parser.getQuery());
|
||||
maxResultsByFilters = s.size();
|
||||
} else {
|
||||
List<Query> filters = rb.getFilters();
|
||||
|
||||
// Get the maximum possible hits within these filters (size of most restrictive filter).
|
||||
if (filters != null) {
|
||||
for (Query query : filters) {
|
||||
DocSet s = searcher.getDocSet(query);
|
||||
if (s != null) {
|
||||
maxResultsByFilters = Math.min(s.size(), maxResultsByFilters);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (IOException e){
|
||||
LOG.error(e.toString());
|
||||
return null;
|
||||
} catch (SyntaxError e) {
|
||||
LOG.error(e.toString());
|
||||
return null;
|
||||
}
|
||||
|
||||
// Recalculate maxResultsForSuggest if filters were specified
|
||||
if (maxResultsByFilters != Integer.MAX_VALUE) {
|
||||
maxResultsForSuggest = Math.round(maxResultsByFilters * maxResultsForSuggestParamValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
return maxResultsForSuggest;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
protected void addCollationsToResponse(SolrParams params, SpellingResult spellingResult, ResponseBuilder rb, String q,
|
||||
NamedList response, boolean suggestionsMayOverlap) {
|
||||
|
@ -319,7 +382,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false);
|
||||
int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0);
|
||||
int maxCollations = params.getInt(SPELLCHECK_MAX_COLLATIONS, 1);
|
||||
Integer maxResultsForSuggest = params.getInt(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST);
|
||||
Integer maxResultsForSuggest = maxResultsForSuggest(rb);
|
||||
int count = rb.req.getParams().getInt(SPELLCHECK_COUNT, 1);
|
||||
int numSug = Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT);
|
||||
|
||||
|
|
|
@ -173,6 +173,10 @@ public class DistributedSpellCheckComponentTest extends BaseDistributedSearchTes
|
|||
false, requestHandlerName, random().nextBoolean(), extended, "true", count, "10",
|
||||
collate, "true", maxCollationTries, "10", maxCollations, "1", collateExtended, "false",
|
||||
altTermCount, "5", maxResults, "10"));
|
||||
query(buildRequest("lowerfilt:(\"rod fix\")",
|
||||
false, requestHandlerName, random().nextBoolean(), extended, "true", count, "10",
|
||||
collate, "true", maxCollationTries, "10", maxCollations, "1", collateExtended, "false",
|
||||
altTermCount, "5", maxResults, ".10", "fq", "id:[13 TO 22]"));
|
||||
|
||||
//Test word-break spellchecker
|
||||
query(buildRequest("lowerfilt:(+quock +redfox +jum +ped)",
|
||||
|
|
|
@ -61,11 +61,11 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
|
|||
assertU((adoc("id", "2", "lowerfilt", "This is a document")));
|
||||
assertU((adoc("id", "3", "lowerfilt", "another document")));
|
||||
//bunch of docs that are variants on blue
|
||||
assertU((adoc("id", "4", "lowerfilt", "blue")));
|
||||
assertU((adoc("id", "5", "lowerfilt", "blud")));
|
||||
assertU((adoc("id", "6", "lowerfilt", "boue")));
|
||||
assertU((adoc("id", "7", "lowerfilt", "glue")));
|
||||
assertU((adoc("id", "8", "lowerfilt", "blee")));
|
||||
assertU((adoc("id", "4", "lowerfilt", "this blue")));
|
||||
assertU((adoc("id", "5", "lowerfilt", "this blud")));
|
||||
assertU((adoc("id", "6", "lowerfilt", "this boue")));
|
||||
assertU((adoc("id", "7", "lowerfilt", "this glue")));
|
||||
assertU((adoc("id", "8", "lowerfilt", "this blee")));
|
||||
assertU((adoc("id", "9", "lowerfilt", "pixmaa 12345")));
|
||||
assertU((commit()));
|
||||
}
|
||||
|
@ -79,6 +79,58 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaximumResultsForSuggest() throws Exception {
|
||||
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
|
||||
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, "7")
|
||||
,"/spellcheck/suggestions/[0]=='brwn'"
|
||||
,"/spellcheck/suggestions/[1]/numFound==1"
|
||||
);
|
||||
try {
|
||||
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
|
||||
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, "6")
|
||||
,"/spellcheck/suggestions/[1]/numFound==1"
|
||||
);
|
||||
fail("there should have been no suggestions (6<7)");
|
||||
} catch(Exception e) {
|
||||
//correctly threw exception
|
||||
}
|
||||
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
|
||||
"fq", "id:[0 TO 9]", /*returns 10, less selective */ "fq", "lowerfilt:th*", /* returns 8, most selective */
|
||||
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".90")
|
||||
,"/spellcheck/suggestions/[0]=='brwn'"
|
||||
,"/spellcheck/suggestions/[1]/numFound==1"
|
||||
);
|
||||
try {
|
||||
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
|
||||
"fq", "id:[0 TO 9]", /*returns 10, less selective */ "fq", "lowerfilt:th*", /* returns 8, most selective */
|
||||
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".80")
|
||||
,"/spellcheck/suggestions/[1]/numFound==1"
|
||||
);
|
||||
fail("there should have been no suggestions ((.8 * 8)<7)");
|
||||
} catch(Exception e) {
|
||||
//correctly threw exception
|
||||
}
|
||||
|
||||
|
||||
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
|
||||
"fq", "id:[0 TO 9]", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ, "id:[0 TO 9]",
|
||||
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".70")
|
||||
,"/spellcheck/suggestions/[0]=='brwn'"
|
||||
,"/spellcheck/suggestions/[1]/numFound==1"
|
||||
);
|
||||
try {
|
||||
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
|
||||
"fq", "id:[0 TO 9]", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ, "lowerfilt:th*",
|
||||
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".64")
|
||||
,"/spellcheck/suggestions/[1]/numFound==1"
|
||||
);
|
||||
fail("there should have been no suggestions ((.64 * 10)<7)");
|
||||
} catch(Exception e) {
|
||||
//correctly threw exception
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExtendedResultsCount() throws Exception {
|
||||
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","bluo", SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false")
|
||||
|
|
|
@ -52,8 +52,10 @@ public interface SpellingParams {
|
|||
/**
|
||||
* <p>
|
||||
* The maximum number of hits the request can return in order to both
|
||||
* generate spelling suggestions and set the "correctlySpelled" element to "false".
|
||||
* Note that this parameter is typically of use only in conjunction with "spellcheck.alternativeTermCount".
|
||||
* generate spelling suggestions and set the "correctlySpelled" element to "false". This can be specified
|
||||
* either as a whole number number of documents, or it can be expressed as a fractional percentage
|
||||
* of documents returned by a chosen filter query. By default, the chosen filter is the most restrictive
|
||||
* fq clause. This can be overridden with {@link SpellingParams#SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ} .
|
||||
* </p>
|
||||
* <p>
|
||||
* If left unspecified, the default behavior will prevail. That is, "correctlySpelled" will be false and suggestions
|
||||
|
@ -66,6 +68,14 @@ public interface SpellingParams {
|
|||
*/
|
||||
public static final String SPELLCHECK_MAX_RESULTS_FOR_SUGGEST = SPELLCHECK_PREFIX + "maxResultsForSuggest";
|
||||
|
||||
/**
|
||||
*<p>
|
||||
* To be used when {@link SpellingParams#SPELLCHECK_MAX_RESULTS_FOR_SUGGEST} is expressed as a fractional percentage.
|
||||
* Specify a filter query whose result count is used to determine the maximum number of documents.
|
||||
*</p>
|
||||
*/
|
||||
public static final String SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ = SPELLCHECK_MAX_RESULTS_FOR_SUGGEST + ".fq";
|
||||
|
||||
/**
|
||||
* When this parameter is set to true and the misspelled word exists in the
|
||||
* user field, only words that occur more frequently in the Solr field than
|
||||
|
|
Loading…
Reference in New Issue