mirror of https://github.com/apache/lucene.git
SOLR-4280: Allow specifying "spellcheck.maxResultsForSuggest" as a percentage of filter query results
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1720636 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d74968d925
commit
ade2f7e077
|
@ -225,6 +225,9 @@ New Features
|
||||||
|
|
||||||
* SOLR-8434: Add wildcard support to role, to match any role in RuleBasedAuthorizationPlugin (noble)
|
* SOLR-8434: Add wildcard support to role, to match any role in RuleBasedAuthorizationPlugin (noble)
|
||||||
|
|
||||||
|
* SOLR-4280: Allow specifying "spellcheck.maxResultsForSuggest" as a percentage of filter
|
||||||
|
query results (Markus Jelsma via James Dyer)
|
||||||
|
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
----------------------
|
----------------------
|
||||||
|
|
|
@ -42,6 +42,7 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.spell.SuggestMode;
|
import org.apache.lucene.search.spell.SuggestMode;
|
||||||
import org.apache.lucene.search.spell.SuggestWord;
|
import org.apache.lucene.search.spell.SuggestWord;
|
||||||
import org.apache.solr.client.solrj.response.SpellCheckResponse;
|
import org.apache.solr.client.solrj.response.SpellCheckResponse;
|
||||||
|
@ -53,11 +54,17 @@ import org.apache.solr.common.params.SolrParams;
|
||||||
import org.apache.solr.common.params.SpellingParams;
|
import org.apache.solr.common.params.SpellingParams;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||||
|
import org.apache.solr.core.SolrConfig;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
import org.apache.solr.core.SolrEventListener;
|
import org.apache.solr.core.SolrEventListener;
|
||||||
import org.apache.solr.core.SolrResourceLoader;
|
import org.apache.solr.core.SolrResourceLoader;
|
||||||
import org.apache.solr.schema.FieldType;
|
import org.apache.solr.schema.FieldType;
|
||||||
import org.apache.solr.schema.IndexSchema;
|
import org.apache.solr.schema.IndexSchema;
|
||||||
|
import org.apache.solr.search.DocSet;
|
||||||
|
import org.apache.solr.search.QParser;
|
||||||
|
import org.apache.solr.search.QParserPlugin;
|
||||||
|
import org.apache.solr.search.SyntaxError;
|
||||||
|
import org.apache.solr.search.SolrCache;
|
||||||
import org.apache.solr.search.SolrIndexSearcher;
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
import org.apache.solr.spelling.AbstractLuceneSpellChecker;
|
import org.apache.solr.spelling.AbstractLuceneSpellChecker;
|
||||||
import org.apache.solr.spelling.ConjunctionSolrSpellChecker;
|
import org.apache.solr.spelling.ConjunctionSolrSpellChecker;
|
||||||
|
@ -160,7 +167,9 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
||||||
boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
|
boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
|
||||||
float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
|
float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
|
||||||
int alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT, 0);
|
int alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT, 0);
|
||||||
Integer maxResultsForSuggest = params.getInt(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST);
|
//If specified, this can be a discrete # of results, or a percentage of fq results.
|
||||||
|
Integer maxResultsForSuggest = maxResultsForSuggest(rb);
|
||||||
|
|
||||||
ModifiableSolrParams customParams = new ModifiableSolrParams();
|
ModifiableSolrParams customParams = new ModifiableSolrParams();
|
||||||
for (String checkerName : getDictionaryNames(params)) {
|
for (String checkerName : getDictionaryNames(params)) {
|
||||||
customParams.add(getCustomParams(checkerName, params));
|
customParams.add(getCustomParams(checkerName, params));
|
||||||
|
@ -173,6 +182,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
||||||
} else {
|
} else {
|
||||||
hits = hitsInteger.longValue();
|
hits = hitsInteger.longValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
SpellingResult spellingResult = null;
|
SpellingResult spellingResult = null;
|
||||||
if (maxResultsForSuggest == null || hits <= maxResultsForSuggest) {
|
if (maxResultsForSuggest == null || hits <= maxResultsForSuggest) {
|
||||||
SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
|
SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
|
||||||
|
@ -214,7 +224,60 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Integer maxResultsForSuggest(ResponseBuilder rb) {
|
||||||
|
SolrParams params = rb.req.getParams();
|
||||||
|
float maxResultsForSuggestParamValue = params.getFloat(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, 0.0f);
|
||||||
|
Integer maxResultsForSuggest = null;
|
||||||
|
|
||||||
|
if (maxResultsForSuggestParamValue > 0.0f) {
|
||||||
|
if (maxResultsForSuggestParamValue == (int) maxResultsForSuggestParamValue) {
|
||||||
|
// If a whole number was passed in, this is a discrete number of documents
|
||||||
|
maxResultsForSuggest = (int) maxResultsForSuggestParamValue;
|
||||||
|
} else {
|
||||||
|
// If a fractional value was passed in, this is the % of documents returned by the specified filter
|
||||||
|
// If no specified filter, we use the most restrictive filter of the fq parameters
|
||||||
|
String maxResultsFilterQueryString = params.get(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ);
|
||||||
|
|
||||||
|
int maxResultsByFilters = Integer.MAX_VALUE;
|
||||||
|
SolrIndexSearcher searcher = rb.req.getSearcher();
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (maxResultsFilterQueryString != null) {
|
||||||
|
// Get the default Lucene query parser
|
||||||
|
QParser parser = QParser.getParser(maxResultsFilterQueryString, QParserPlugin.DEFAULT_QTYPE, rb.req);
|
||||||
|
DocSet s = searcher.getDocSet(parser.getQuery());
|
||||||
|
maxResultsByFilters = s.size();
|
||||||
|
} else {
|
||||||
|
List<Query> filters = rb.getFilters();
|
||||||
|
|
||||||
|
// Get the maximum possible hits within these filters (size of most restrictive filter).
|
||||||
|
if (filters != null) {
|
||||||
|
for (Query query : filters) {
|
||||||
|
DocSet s = searcher.getDocSet(query);
|
||||||
|
if (s != null) {
|
||||||
|
maxResultsByFilters = Math.min(s.size(), maxResultsByFilters);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException e){
|
||||||
|
LOG.error(e.toString());
|
||||||
|
return null;
|
||||||
|
} catch (SyntaxError e) {
|
||||||
|
LOG.error(e.toString());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recalculate maxResultsForSuggest if filters were specified
|
||||||
|
if (maxResultsByFilters != Integer.MAX_VALUE) {
|
||||||
|
maxResultsForSuggest = Math.round(maxResultsByFilters * maxResultsForSuggestParamValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return maxResultsForSuggest;
|
||||||
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
protected void addCollationsToResponse(SolrParams params, SpellingResult spellingResult, ResponseBuilder rb, String q,
|
protected void addCollationsToResponse(SolrParams params, SpellingResult spellingResult, ResponseBuilder rb, String q,
|
||||||
NamedList response, boolean suggestionsMayOverlap) {
|
NamedList response, boolean suggestionsMayOverlap) {
|
||||||
|
@ -319,7 +382,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
||||||
boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false);
|
boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false);
|
||||||
int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0);
|
int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0);
|
||||||
int maxCollations = params.getInt(SPELLCHECK_MAX_COLLATIONS, 1);
|
int maxCollations = params.getInt(SPELLCHECK_MAX_COLLATIONS, 1);
|
||||||
Integer maxResultsForSuggest = params.getInt(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST);
|
Integer maxResultsForSuggest = maxResultsForSuggest(rb);
|
||||||
int count = rb.req.getParams().getInt(SPELLCHECK_COUNT, 1);
|
int count = rb.req.getParams().getInt(SPELLCHECK_COUNT, 1);
|
||||||
int numSug = Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT);
|
int numSug = Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT);
|
||||||
|
|
||||||
|
@ -330,7 +393,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
||||||
origQuery = params.get(CommonParams.Q);
|
origQuery = params.get(CommonParams.Q);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
long hits = rb.grouping() ? rb.totalHitCount : rb.getNumberDocumentsFound();
|
long hits = rb.grouping() ? rb.totalHitCount : rb.getNumberDocumentsFound();
|
||||||
boolean isCorrectlySpelled = hits > (maxResultsForSuggest==null ? 0 : maxResultsForSuggest);
|
boolean isCorrectlySpelled = hits > (maxResultsForSuggest==null ? 0 : maxResultsForSuggest);
|
||||||
|
|
||||||
|
|
|
@ -173,6 +173,10 @@ public class DistributedSpellCheckComponentTest extends BaseDistributedSearchTes
|
||||||
false, requestHandlerName, random().nextBoolean(), extended, "true", count, "10",
|
false, requestHandlerName, random().nextBoolean(), extended, "true", count, "10",
|
||||||
collate, "true", maxCollationTries, "10", maxCollations, "1", collateExtended, "false",
|
collate, "true", maxCollationTries, "10", maxCollations, "1", collateExtended, "false",
|
||||||
altTermCount, "5", maxResults, "10"));
|
altTermCount, "5", maxResults, "10"));
|
||||||
|
query(buildRequest("lowerfilt:(\"rod fix\")",
|
||||||
|
false, requestHandlerName, random().nextBoolean(), extended, "true", count, "10",
|
||||||
|
collate, "true", maxCollationTries, "10", maxCollations, "1", collateExtended, "false",
|
||||||
|
altTermCount, "5", maxResults, ".10", "fq", "id:[13 TO 22]"));
|
||||||
|
|
||||||
//Test word-break spellchecker
|
//Test word-break spellchecker
|
||||||
query(buildRequest("lowerfilt:(+quock +redfox +jum +ped)",
|
query(buildRequest("lowerfilt:(+quock +redfox +jum +ped)",
|
||||||
|
|
|
@ -61,11 +61,11 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
|
||||||
assertU((adoc("id", "2", "lowerfilt", "This is a document")));
|
assertU((adoc("id", "2", "lowerfilt", "This is a document")));
|
||||||
assertU((adoc("id", "3", "lowerfilt", "another document")));
|
assertU((adoc("id", "3", "lowerfilt", "another document")));
|
||||||
//bunch of docs that are variants on blue
|
//bunch of docs that are variants on blue
|
||||||
assertU((adoc("id", "4", "lowerfilt", "blue")));
|
assertU((adoc("id", "4", "lowerfilt", "this blue")));
|
||||||
assertU((adoc("id", "5", "lowerfilt", "blud")));
|
assertU((adoc("id", "5", "lowerfilt", "this blud")));
|
||||||
assertU((adoc("id", "6", "lowerfilt", "boue")));
|
assertU((adoc("id", "6", "lowerfilt", "this boue")));
|
||||||
assertU((adoc("id", "7", "lowerfilt", "glue")));
|
assertU((adoc("id", "7", "lowerfilt", "this glue")));
|
||||||
assertU((adoc("id", "8", "lowerfilt", "blee")));
|
assertU((adoc("id", "8", "lowerfilt", "this blee")));
|
||||||
assertU((adoc("id", "9", "lowerfilt", "pixmaa 12345")));
|
assertU((adoc("id", "9", "lowerfilt", "pixmaa 12345")));
|
||||||
assertU((commit()));
|
assertU((commit()));
|
||||||
}
|
}
|
||||||
|
@ -79,6 +79,58 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMaximumResultsForSuggest() throws Exception {
|
||||||
|
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
|
||||||
|
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, "7")
|
||||||
|
,"/spellcheck/suggestions/[0]=='brwn'"
|
||||||
|
,"/spellcheck/suggestions/[1]/numFound==1"
|
||||||
|
);
|
||||||
|
try {
|
||||||
|
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
|
||||||
|
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, "6")
|
||||||
|
,"/spellcheck/suggestions/[1]/numFound==1"
|
||||||
|
);
|
||||||
|
fail("there should have been no suggestions (6<7)");
|
||||||
|
} catch(Exception e) {
|
||||||
|
//correctly threw exception
|
||||||
|
}
|
||||||
|
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
|
||||||
|
"fq", "id:[0 TO 9]", /*returns 10, less selective */ "fq", "lowerfilt:th*", /* returns 8, most selective */
|
||||||
|
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".90")
|
||||||
|
,"/spellcheck/suggestions/[0]=='brwn'"
|
||||||
|
,"/spellcheck/suggestions/[1]/numFound==1"
|
||||||
|
);
|
||||||
|
try {
|
||||||
|
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
|
||||||
|
"fq", "id:[0 TO 9]", /*returns 10, less selective */ "fq", "lowerfilt:th*", /* returns 8, most selective */
|
||||||
|
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".80")
|
||||||
|
,"/spellcheck/suggestions/[1]/numFound==1"
|
||||||
|
);
|
||||||
|
fail("there should have been no suggestions ((.8 * 8)<7)");
|
||||||
|
} catch(Exception e) {
|
||||||
|
//correctly threw exception
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
|
||||||
|
"fq", "id:[0 TO 9]", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ, "id:[0 TO 9]",
|
||||||
|
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".70")
|
||||||
|
,"/spellcheck/suggestions/[0]=='brwn'"
|
||||||
|
,"/spellcheck/suggestions/[1]/numFound==1"
|
||||||
|
);
|
||||||
|
try {
|
||||||
|
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
|
||||||
|
"fq", "id:[0 TO 9]", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ, "lowerfilt:th*",
|
||||||
|
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".64")
|
||||||
|
,"/spellcheck/suggestions/[1]/numFound==1"
|
||||||
|
);
|
||||||
|
fail("there should have been no suggestions ((.64 * 10)<7)");
|
||||||
|
} catch(Exception e) {
|
||||||
|
//correctly threw exception
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExtendedResultsCount() throws Exception {
|
public void testExtendedResultsCount() throws Exception {
|
||||||
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","bluo", SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false")
|
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","bluo", SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false")
|
||||||
|
|
|
@ -52,8 +52,10 @@ public interface SpellingParams {
|
||||||
/**
|
/**
|
||||||
* <p>
|
* <p>
|
||||||
* The maximum number of hits the request can return in order to both
|
* The maximum number of hits the request can return in order to both
|
||||||
* generate spelling suggestions and set the "correctlySpelled" element to "false".
|
* generate spelling suggestions and set the "correctlySpelled" element to "false". This can be specified
|
||||||
* Note that this parameter is typically of use only in conjunction with "spellcheck.alternativeTermCount".
|
* either as a whole number number of documents, or it can be expressed as a fractional percentage
|
||||||
|
* of documents returned by a chosen filter query. By default, the chosen filter is the most restrictive
|
||||||
|
* fq clause. This can be overridden with {@link SpellingParams#SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ} .
|
||||||
* </p>
|
* </p>
|
||||||
* <p>
|
* <p>
|
||||||
* If left unspecified, the default behavior will prevail. That is, "correctlySpelled" will be false and suggestions
|
* If left unspecified, the default behavior will prevail. That is, "correctlySpelled" will be false and suggestions
|
||||||
|
@ -66,6 +68,14 @@ public interface SpellingParams {
|
||||||
*/
|
*/
|
||||||
public static final String SPELLCHECK_MAX_RESULTS_FOR_SUGGEST = SPELLCHECK_PREFIX + "maxResultsForSuggest";
|
public static final String SPELLCHECK_MAX_RESULTS_FOR_SUGGEST = SPELLCHECK_PREFIX + "maxResultsForSuggest";
|
||||||
|
|
||||||
|
/**
|
||||||
|
*<p>
|
||||||
|
* To be used when {@link SpellingParams#SPELLCHECK_MAX_RESULTS_FOR_SUGGEST} is expressed as a fractional percentage.
|
||||||
|
* Specify a filter query whose result count is used to determine the maximum number of documents.
|
||||||
|
*</p>
|
||||||
|
*/
|
||||||
|
public static final String SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ = SPELLCHECK_MAX_RESULTS_FOR_SUGGEST + ".fq";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* When this parameter is set to true and the misspelled word exists in the
|
* When this parameter is set to true and the misspelled word exists in the
|
||||||
* user field, only words that occur more frequently in the Solr field than
|
* user field, only words that occur more frequently in the Solr field than
|
||||||
|
|
Loading…
Reference in New Issue