SOLR-4280: Allow specifying "spellcheck.maxResultsForSuggest" as a percentage of filter query results

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1720636 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
James Dyer 2015-12-17 19:40:24 +00:00
parent d74968d925
commit ade2f7e077
5 changed files with 142 additions and 10 deletions

View File

@ -225,6 +225,9 @@ New Features
* SOLR-8434: Add wildcard support to role, to match any role in RuleBasedAuthorizationPlugin (noble) * SOLR-8434: Add wildcard support to role, to match any role in RuleBasedAuthorizationPlugin (noble)
* SOLR-4280: Allow specifying "spellcheck.maxResultsForSuggest" as a percentage of filter
query results (Markus Jelsma via James Dyer)
Bug Fixes Bug Fixes
---------------------- ----------------------

View File

@ -42,6 +42,7 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.spell.SuggestMode; import org.apache.lucene.search.spell.SuggestMode;
import org.apache.lucene.search.spell.SuggestWord; import org.apache.lucene.search.spell.SuggestWord;
import org.apache.solr.client.solrj.response.SpellCheckResponse; import org.apache.solr.client.solrj.response.SpellCheckResponse;
@ -53,11 +54,17 @@ import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.SpellingParams; import org.apache.solr.common.params.SpellingParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrEventListener; import org.apache.solr.core.SolrEventListener;
import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QParserPlugin;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.SolrCache;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.spelling.AbstractLuceneSpellChecker; import org.apache.solr.spelling.AbstractLuceneSpellChecker;
import org.apache.solr.spelling.ConjunctionSolrSpellChecker; import org.apache.solr.spelling.ConjunctionSolrSpellChecker;
@ -160,7 +167,9 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
boolean collate = params.getBool(SPELLCHECK_COLLATE, false); boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE); float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
int alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT, 0); int alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT, 0);
Integer maxResultsForSuggest = params.getInt(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST); //If specified, this can be a discrete # of results, or a percentage of fq results.
Integer maxResultsForSuggest = maxResultsForSuggest(rb);
ModifiableSolrParams customParams = new ModifiableSolrParams(); ModifiableSolrParams customParams = new ModifiableSolrParams();
for (String checkerName : getDictionaryNames(params)) { for (String checkerName : getDictionaryNames(params)) {
customParams.add(getCustomParams(checkerName, params)); customParams.add(getCustomParams(checkerName, params));
@ -173,6 +182,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
} else { } else {
hits = hitsInteger.longValue(); hits = hitsInteger.longValue();
} }
SpellingResult spellingResult = null; SpellingResult spellingResult = null;
if (maxResultsForSuggest == null || hits <= maxResultsForSuggest) { if (maxResultsForSuggest == null || hits <= maxResultsForSuggest) {
SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX; SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
@ -214,7 +224,60 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
} }
} }
} }
private Integer maxResultsForSuggest(ResponseBuilder rb) {
SolrParams params = rb.req.getParams();
float maxResultsForSuggestParamValue = params.getFloat(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, 0.0f);
Integer maxResultsForSuggest = null;
if (maxResultsForSuggestParamValue > 0.0f) {
if (maxResultsForSuggestParamValue == (int) maxResultsForSuggestParamValue) {
// If a whole number was passed in, this is a discrete number of documents
maxResultsForSuggest = (int) maxResultsForSuggestParamValue;
} else {
// If a fractional value was passed in, this is the % of documents returned by the specified filter
// If no specified filter, we use the most restrictive filter of the fq parameters
String maxResultsFilterQueryString = params.get(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ);
int maxResultsByFilters = Integer.MAX_VALUE;
SolrIndexSearcher searcher = rb.req.getSearcher();
try {
if (maxResultsFilterQueryString != null) {
// Get the default Lucene query parser
QParser parser = QParser.getParser(maxResultsFilterQueryString, QParserPlugin.DEFAULT_QTYPE, rb.req);
DocSet s = searcher.getDocSet(parser.getQuery());
maxResultsByFilters = s.size();
} else {
List<Query> filters = rb.getFilters();
// Get the maximum possible hits within these filters (size of most restrictive filter).
if (filters != null) {
for (Query query : filters) {
DocSet s = searcher.getDocSet(query);
if (s != null) {
maxResultsByFilters = Math.min(s.size(), maxResultsByFilters);
}
}
}
}
} catch (IOException e){
LOG.error(e.toString());
return null;
} catch (SyntaxError e) {
LOG.error(e.toString());
return null;
}
// Recalculate maxResultsForSuggest if filters were specified
if (maxResultsByFilters != Integer.MAX_VALUE) {
maxResultsForSuggest = Math.round(maxResultsByFilters * maxResultsForSuggestParamValue);
}
}
}
return maxResultsForSuggest;
}
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
protected void addCollationsToResponse(SolrParams params, SpellingResult spellingResult, ResponseBuilder rb, String q, protected void addCollationsToResponse(SolrParams params, SpellingResult spellingResult, ResponseBuilder rb, String q,
NamedList response, boolean suggestionsMayOverlap) { NamedList response, boolean suggestionsMayOverlap) {
@ -319,7 +382,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false); boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false);
int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0); int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0);
int maxCollations = params.getInt(SPELLCHECK_MAX_COLLATIONS, 1); int maxCollations = params.getInt(SPELLCHECK_MAX_COLLATIONS, 1);
Integer maxResultsForSuggest = params.getInt(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST); Integer maxResultsForSuggest = maxResultsForSuggest(rb);
int count = rb.req.getParams().getInt(SPELLCHECK_COUNT, 1); int count = rb.req.getParams().getInt(SPELLCHECK_COUNT, 1);
int numSug = Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT); int numSug = Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT);
@ -330,7 +393,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
origQuery = params.get(CommonParams.Q); origQuery = params.get(CommonParams.Q);
} }
} }
long hits = rb.grouping() ? rb.totalHitCount : rb.getNumberDocumentsFound(); long hits = rb.grouping() ? rb.totalHitCount : rb.getNumberDocumentsFound();
boolean isCorrectlySpelled = hits > (maxResultsForSuggest==null ? 0 : maxResultsForSuggest); boolean isCorrectlySpelled = hits > (maxResultsForSuggest==null ? 0 : maxResultsForSuggest);

View File

@ -173,6 +173,10 @@ public class DistributedSpellCheckComponentTest extends BaseDistributedSearchTes
false, requestHandlerName, random().nextBoolean(), extended, "true", count, "10", false, requestHandlerName, random().nextBoolean(), extended, "true", count, "10",
collate, "true", maxCollationTries, "10", maxCollations, "1", collateExtended, "false", collate, "true", maxCollationTries, "10", maxCollations, "1", collateExtended, "false",
altTermCount, "5", maxResults, "10")); altTermCount, "5", maxResults, "10"));
query(buildRequest("lowerfilt:(\"rod fix\")",
false, requestHandlerName, random().nextBoolean(), extended, "true", count, "10",
collate, "true", maxCollationTries, "10", maxCollations, "1", collateExtended, "false",
altTermCount, "5", maxResults, ".10", "fq", "id:[13 TO 22]"));
//Test word-break spellchecker //Test word-break spellchecker
query(buildRequest("lowerfilt:(+quock +redfox +jum +ped)", query(buildRequest("lowerfilt:(+quock +redfox +jum +ped)",

View File

@ -61,11 +61,11 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
assertU((adoc("id", "2", "lowerfilt", "This is a document"))); assertU((adoc("id", "2", "lowerfilt", "This is a document")));
assertU((adoc("id", "3", "lowerfilt", "another document"))); assertU((adoc("id", "3", "lowerfilt", "another document")));
//bunch of docs that are variants on blue //bunch of docs that are variants on blue
assertU((adoc("id", "4", "lowerfilt", "blue"))); assertU((adoc("id", "4", "lowerfilt", "this blue")));
assertU((adoc("id", "5", "lowerfilt", "blud"))); assertU((adoc("id", "5", "lowerfilt", "this blud")));
assertU((adoc("id", "6", "lowerfilt", "boue"))); assertU((adoc("id", "6", "lowerfilt", "this boue")));
assertU((adoc("id", "7", "lowerfilt", "glue"))); assertU((adoc("id", "7", "lowerfilt", "this glue")));
assertU((adoc("id", "8", "lowerfilt", "blee"))); assertU((adoc("id", "8", "lowerfilt", "this blee")));
assertU((adoc("id", "9", "lowerfilt", "pixmaa 12345"))); assertU((adoc("id", "9", "lowerfilt", "pixmaa 12345")));
assertU((commit())); assertU((commit()));
} }
@ -79,6 +79,58 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
} }
@Test
public void testMaximumResultsForSuggest() throws Exception {
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, "7")
,"/spellcheck/suggestions/[0]=='brwn'"
,"/spellcheck/suggestions/[1]/numFound==1"
);
try {
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, "6")
,"/spellcheck/suggestions/[1]/numFound==1"
);
fail("there should have been no suggestions (6<7)");
} catch(Exception e) {
//correctly threw exception
}
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
"fq", "id:[0 TO 9]", /*returns 10, less selective */ "fq", "lowerfilt:th*", /* returns 8, most selective */
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".90")
,"/spellcheck/suggestions/[0]=='brwn'"
,"/spellcheck/suggestions/[1]/numFound==1"
);
try {
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
"fq", "id:[0 TO 9]", /*returns 10, less selective */ "fq", "lowerfilt:th*", /* returns 8, most selective */
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".80")
,"/spellcheck/suggestions/[1]/numFound==1"
);
fail("there should have been no suggestions ((.8 * 8)<7)");
} catch(Exception e) {
//correctly threw exception
}
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
"fq", "id:[0 TO 9]", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ, "id:[0 TO 9]",
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".70")
,"/spellcheck/suggestions/[0]=='brwn'"
,"/spellcheck/suggestions/[1]/numFound==1"
);
try {
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
"fq", "id:[0 TO 9]", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ, "lowerfilt:th*",
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".64")
,"/spellcheck/suggestions/[1]/numFound==1"
);
fail("there should have been no suggestions ((.64 * 10)<7)");
} catch(Exception e) {
//correctly threw exception
}
}
@Test @Test
public void testExtendedResultsCount() throws Exception { public void testExtendedResultsCount() throws Exception {
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","bluo", SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false") assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","bluo", SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false")

View File

@ -52,8 +52,10 @@ public interface SpellingParams {
/** /**
* <p> * <p>
* The maximum number of hits the request can return in order to both * The maximum number of hits the request can return in order to both
* generate spelling suggestions and set the "correctlySpelled" element to "false". * generate spelling suggestions and set the "correctlySpelled" element to "false". This can be specified
* Note that this parameter is typically of use only in conjunction with "spellcheck.alternativeTermCount". * either as a whole number number of documents, or it can be expressed as a fractional percentage
* of documents returned by a chosen filter query. By default, the chosen filter is the most restrictive
* fq clause. This can be overridden with {@link SpellingParams#SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ} .
* </p> * </p>
* <p> * <p>
* If left unspecified, the default behavior will prevail. That is, "correctlySpelled" will be false and suggestions * If left unspecified, the default behavior will prevail. That is, "correctlySpelled" will be false and suggestions
@ -66,6 +68,14 @@ public interface SpellingParams {
*/ */
public static final String SPELLCHECK_MAX_RESULTS_FOR_SUGGEST = SPELLCHECK_PREFIX + "maxResultsForSuggest"; public static final String SPELLCHECK_MAX_RESULTS_FOR_SUGGEST = SPELLCHECK_PREFIX + "maxResultsForSuggest";
/**
*<p>
* To be used when {@link SpellingParams#SPELLCHECK_MAX_RESULTS_FOR_SUGGEST} is expressed as a fractional percentage.
* Specify a filter query whose result count is used to determine the maximum number of documents.
*</p>
*/
public static final String SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ = SPELLCHECK_MAX_RESULTS_FOR_SUGGEST + ".fq";
/** /**
* When this parameter is set to true and the misspelled word exists in the * When this parameter is set to true and the misspelled word exists in the
* user field, only words that occur more frequently in the Solr field than * user field, only words that occur more frequently in the Solr field than