SOLR-4280: Allow specifying "spellcheck.maxResultsForSuggest" as a percentage of filter query results

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1720636 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
James Dyer 2015-12-17 19:40:24 +00:00
parent d74968d925
commit ade2f7e077
5 changed files with 142 additions and 10 deletions

View File

@ -225,6 +225,9 @@ New Features
* SOLR-8434: Add wildcard support to role, to match any role in RuleBasedAuthorizationPlugin (noble)
* SOLR-4280: Allow specifying "spellcheck.maxResultsForSuggest" as a percentage of filter
query results (Markus Jelsma via James Dyer)
Bug Fixes
----------------------

View File

@ -42,6 +42,7 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.spell.SuggestMode;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.solr.client.solrj.response.SpellCheckResponse;
@ -53,11 +54,17 @@ import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.SpellingParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrEventListener;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QParserPlugin;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.SolrCache;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.spelling.AbstractLuceneSpellChecker;
import org.apache.solr.spelling.ConjunctionSolrSpellChecker;
@ -160,7 +167,9 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
int alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT, 0);
Integer maxResultsForSuggest = params.getInt(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST);
//If specified, this can be a discrete # of results, or a percentage of fq results.
Integer maxResultsForSuggest = maxResultsForSuggest(rb);
ModifiableSolrParams customParams = new ModifiableSolrParams();
for (String checkerName : getDictionaryNames(params)) {
customParams.add(getCustomParams(checkerName, params));
@ -173,6 +182,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
} else {
hits = hitsInteger.longValue();
}
SpellingResult spellingResult = null;
if (maxResultsForSuggest == null || hits <= maxResultsForSuggest) {
SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
@ -214,7 +224,60 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
}
}
}
private Integer maxResultsForSuggest(ResponseBuilder rb) {
SolrParams params = rb.req.getParams();
float maxResultsForSuggestParamValue = params.getFloat(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, 0.0f);
Integer maxResultsForSuggest = null;
if (maxResultsForSuggestParamValue > 0.0f) {
if (maxResultsForSuggestParamValue == (int) maxResultsForSuggestParamValue) {
// If a whole number was passed in, this is a discrete number of documents
maxResultsForSuggest = (int) maxResultsForSuggestParamValue;
} else {
// If a fractional value was passed in, this is the % of documents returned by the specified filter
// If no specified filter, we use the most restrictive filter of the fq parameters
String maxResultsFilterQueryString = params.get(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ);
int maxResultsByFilters = Integer.MAX_VALUE;
SolrIndexSearcher searcher = rb.req.getSearcher();
try {
if (maxResultsFilterQueryString != null) {
// Get the default Lucene query parser
QParser parser = QParser.getParser(maxResultsFilterQueryString, QParserPlugin.DEFAULT_QTYPE, rb.req);
DocSet s = searcher.getDocSet(parser.getQuery());
maxResultsByFilters = s.size();
} else {
List<Query> filters = rb.getFilters();
// Get the maximum possible hits within these filters (size of most restrictive filter).
if (filters != null) {
for (Query query : filters) {
DocSet s = searcher.getDocSet(query);
if (s != null) {
maxResultsByFilters = Math.min(s.size(), maxResultsByFilters);
}
}
}
}
} catch (IOException e){
LOG.error(e.toString());
return null;
} catch (SyntaxError e) {
LOG.error(e.toString());
return null;
}
// Recalculate maxResultsForSuggest if filters were specified
if (maxResultsByFilters != Integer.MAX_VALUE) {
maxResultsForSuggest = Math.round(maxResultsByFilters * maxResultsForSuggestParamValue);
}
}
}
return maxResultsForSuggest;
}
@SuppressWarnings("unchecked")
protected void addCollationsToResponse(SolrParams params, SpellingResult spellingResult, ResponseBuilder rb, String q,
NamedList response, boolean suggestionsMayOverlap) {
@ -319,7 +382,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false);
int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0);
int maxCollations = params.getInt(SPELLCHECK_MAX_COLLATIONS, 1);
Integer maxResultsForSuggest = params.getInt(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST);
Integer maxResultsForSuggest = maxResultsForSuggest(rb);
int count = rb.req.getParams().getInt(SPELLCHECK_COUNT, 1);
int numSug = Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT);
@ -330,7 +393,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
origQuery = params.get(CommonParams.Q);
}
}
long hits = rb.grouping() ? rb.totalHitCount : rb.getNumberDocumentsFound();
boolean isCorrectlySpelled = hits > (maxResultsForSuggest==null ? 0 : maxResultsForSuggest);

View File

@ -173,6 +173,10 @@ public class DistributedSpellCheckComponentTest extends BaseDistributedSearchTes
false, requestHandlerName, random().nextBoolean(), extended, "true", count, "10",
collate, "true", maxCollationTries, "10", maxCollations, "1", collateExtended, "false",
altTermCount, "5", maxResults, "10"));
query(buildRequest("lowerfilt:(\"rod fix\")",
false, requestHandlerName, random().nextBoolean(), extended, "true", count, "10",
collate, "true", maxCollationTries, "10", maxCollations, "1", collateExtended, "false",
altTermCount, "5", maxResults, ".10", "fq", "id:[13 TO 22]"));
//Test word-break spellchecker
query(buildRequest("lowerfilt:(+quock +redfox +jum +ped)",

View File

@ -61,11 +61,11 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
assertU((adoc("id", "2", "lowerfilt", "This is a document")));
assertU((adoc("id", "3", "lowerfilt", "another document")));
//bunch of docs that are variants on blue
assertU((adoc("id", "4", "lowerfilt", "blue")));
assertU((adoc("id", "5", "lowerfilt", "blud")));
assertU((adoc("id", "6", "lowerfilt", "boue")));
assertU((adoc("id", "7", "lowerfilt", "glue")));
assertU((adoc("id", "8", "lowerfilt", "blee")));
assertU((adoc("id", "4", "lowerfilt", "this blue")));
assertU((adoc("id", "5", "lowerfilt", "this blud")));
assertU((adoc("id", "6", "lowerfilt", "this boue")));
assertU((adoc("id", "7", "lowerfilt", "this glue")));
assertU((adoc("id", "8", "lowerfilt", "this blee")));
assertU((adoc("id", "9", "lowerfilt", "pixmaa 12345")));
assertU((commit()));
}
@ -79,6 +79,58 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
}
@Test
public void testMaximumResultsForSuggest() throws Exception {
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, "7")
,"/spellcheck/suggestions/[0]=='brwn'"
,"/spellcheck/suggestions/[1]/numFound==1"
);
try {
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, "6")
,"/spellcheck/suggestions/[1]/numFound==1"
);
fail("there should have been no suggestions (6<7)");
} catch(Exception e) {
//correctly threw exception
}
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
"fq", "id:[0 TO 9]", /*returns 10, less selective */ "fq", "lowerfilt:th*", /* returns 8, most selective */
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".90")
,"/spellcheck/suggestions/[0]=='brwn'"
,"/spellcheck/suggestions/[1]/numFound==1"
);
try {
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
"fq", "id:[0 TO 9]", /*returns 10, less selective */ "fq", "lowerfilt:th*", /* returns 8, most selective */
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".80")
,"/spellcheck/suggestions/[1]/numFound==1"
);
fail("there should have been no suggestions ((.8 * 8)<7)");
} catch(Exception e) {
//correctly threw exception
}
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
"fq", "id:[0 TO 9]", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ, "id:[0 TO 9]",
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".70")
,"/spellcheck/suggestions/[0]=='brwn'"
,"/spellcheck/suggestions/[1]/numFound==1"
);
try {
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
"fq", "id:[0 TO 9]", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ, "lowerfilt:th*",
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".64")
,"/spellcheck/suggestions/[1]/numFound==1"
);
fail("there should have been no suggestions ((.64 * 10)<7)");
} catch(Exception e) {
//correctly threw exception
}
}
@Test
public void testExtendedResultsCount() throws Exception {
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","bluo", SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false")

View File

@ -52,8 +52,10 @@ public interface SpellingParams {
/**
* <p>
* The maximum number of hits the request can return in order to both
* generate spelling suggestions and set the "correctlySpelled" element to "false".
* Note that this parameter is typically of use only in conjunction with "spellcheck.alternativeTermCount".
* generate spelling suggestions and set the "correctlySpelled" element to "false". This can be specified
* either as a whole number number of documents, or it can be expressed as a fractional percentage
* of documents returned by a chosen filter query. By default, the chosen filter is the most restrictive
* fq clause. This can be overridden with {@link SpellingParams#SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ} .
* </p>
* <p>
* If left unspecified, the default behavior will prevail. That is, "correctlySpelled" will be false and suggestions
@ -66,6 +68,14 @@ public interface SpellingParams {
*/
public static final String SPELLCHECK_MAX_RESULTS_FOR_SUGGEST = SPELLCHECK_PREFIX + "maxResultsForSuggest";
/**
*<p>
* To be used when {@link SpellingParams#SPELLCHECK_MAX_RESULTS_FOR_SUGGEST} is expressed as a fractional percentage.
* Specify a filter query whose result count is used to determine the maximum number of documents.
*</p>
*/
public static final String SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ = SPELLCHECK_MAX_RESULTS_FOR_SUGGEST + ".fq";
/**
* When this parameter is set to true and the misspelled word exists in the
* user field, only words that occur more frequently in the Solr field than