mirror of https://github.com/apache/lucene.git
SOLR-3240: add "spellcheck.collateMaxCollectDocs" for estimating collation hit-counts.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1479638 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a57bc586e2
commit
7141cb35a7
|
@ -66,6 +66,12 @@ New Features
|
|||
* SOLR-4761: Add option to plugin a merged segment warmer into solrconfig.xml
|
||||
(Mark Miller, Mike McCandless, Robert Muir)
|
||||
|
||||
* SOLR-3240: Add "spellcheck.collateMaxCollectDocs" option so that when testing
|
||||
potential Collations against the index, SpellCheckComponent will only collect
|
||||
n documents, thereby estimating the hit-count. This is a performance optimization
|
||||
in cases where exact hit-counts are unnecessary. Also, when "collateExtendedResults"
|
||||
is false, this optimization is always made (James Dyer).
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -214,10 +214,20 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0);
|
||||
int maxCollationEvaluations = params.getInt(SPELLCHECK_MAX_COLLATION_EVALUATIONS, 10000);
|
||||
boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false);
|
||||
int maxCollationCollectDocs = params.getInt(SPELLCHECK_COLLATE_MAX_COLLECT_DOCS, 0);
|
||||
// If not reporting hits counts, don't bother collecting more than 1 document per try.
|
||||
if (!collationExtendedResults) {
|
||||
maxCollationCollectDocs = 1;
|
||||
}
|
||||
boolean shard = params.getBool(ShardParams.IS_SHARD, false);
|
||||
|
||||
SpellCheckCollator collator = new SpellCheckCollator();
|
||||
List<SpellCheckCollation> collations = collator.collate(spellingResult, q, rb, maxCollations, maxCollationTries, maxCollationEvaluations, suggestionsMayOverlap);
|
||||
SpellCheckCollator collator = new SpellCheckCollator()
|
||||
.setMaxCollations(maxCollations)
|
||||
.setMaxCollationTries(maxCollationTries)
|
||||
.setMaxCollationEvaluations(maxCollationEvaluations)
|
||||
.setSuggestionsMayOverlap(suggestionsMayOverlap)
|
||||
.setDocCollectionLimit(maxCollationCollectDocs)
|
||||
.setReportHits(collationExtendedResults);
|
||||
List<SpellCheckCollation> collations = collator.collate(spellingResult, q, rb);
|
||||
//by sorting here we guarantee a non-distributed request returns all
|
||||
//results in the same order as a distributed request would,
|
||||
//even in cases when the internal rank is the same.
|
||||
|
|
|
@ -0,0 +1,85 @@
|
|||
package org.apache.solr.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
/**
|
||||
* <p>
|
||||
* A wrapper {@link Collector} that throws {@link EarlyTerminatingCollectorException})
|
||||
* once a specified maximum number of documents are collected.
|
||||
* </p>
|
||||
*/
|
||||
public class EarlyTerminatingCollector extends Collector {
|
||||
private int numCollected;
|
||||
private int lastDocId = -1;
|
||||
private int maxDocsToCollect;
|
||||
private Collector delegate;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Wraps a {@link Collector}, throwing {@link EarlyTerminatingCollectorException}
|
||||
* once the specified maximum is reached.
|
||||
* </p>
|
||||
* @param delegate - the Collector to wrap.
|
||||
* @param maxDocsToCollect - the maximum number of documents to Collect
|
||||
*
|
||||
*/
|
||||
public EarlyTerminatingCollector(Collector delegate, int maxDocsToCollect) {
|
||||
this.delegate = delegate;
|
||||
this.maxDocsToCollect = maxDocsToCollect;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return delegate.acceptsDocsOutOfOrder();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
delegate.collect(doc);
|
||||
lastDocId = doc;
|
||||
numCollected++;
|
||||
if(numCollected==maxDocsToCollect) {
|
||||
throw new EarlyTerminatingCollectorException(numCollected, lastDocId);
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
delegate.setNextReader(context);
|
||||
}
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
delegate.setScorer(scorer);
|
||||
}
|
||||
public int getNumCollected() {
|
||||
return numCollected;
|
||||
}
|
||||
public void setNumCollected(int numCollected) {
|
||||
this.numCollected = numCollected;
|
||||
}
|
||||
public int getLastDocId() {
|
||||
return lastDocId;
|
||||
}
|
||||
public void setLastDocId(int lastDocId) {
|
||||
this.lastDocId = lastDocId;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
package org.apache.solr.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
* Thrown by {@link EarlyTerminatingCollector} when the maximum to abort
|
||||
* the scoring / collection process early, when the specified maximum number
|
||||
* of documents were collected.
|
||||
*/
|
||||
public class EarlyTerminatingCollectorException extends RuntimeException {
|
||||
private static final long serialVersionUID = 5939241340763428118L;
|
||||
private int lastDocId = -1;
|
||||
private int numberCollected;
|
||||
|
||||
public EarlyTerminatingCollectorException(int numberCollected, int lastDocId) {
|
||||
this.numberCollected = numberCollected;
|
||||
this.lastDocId = lastDocId;
|
||||
}
|
||||
public int getLastDocId() {
|
||||
return lastDocId;
|
||||
}
|
||||
public void setLastDocId(int lastDocId) {
|
||||
this.lastDocId = lastDocId;
|
||||
}
|
||||
public int getNumberCollected() {
|
||||
return numberCollected;
|
||||
}
|
||||
public void setNumberCollected(int numberCollected) {
|
||||
this.numberCollected = numberCollected;
|
||||
}
|
||||
}
|
|
@ -99,6 +99,7 @@ import org.apache.solr.request.UnInvertedField;
|
|||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.spelling.QueryConverter;
|
||||
import org.apache.solr.update.SolrIndexConfig;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -1235,7 +1236,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
|
|||
public static final int GET_DOCSET = 0x40000000;
|
||||
static final int NO_CHECK_FILTERCACHE = 0x20000000;
|
||||
static final int NO_SET_QCACHE = 0x10000000;
|
||||
|
||||
public static final int TERMINATE_EARLY = 0x04;
|
||||
public static final int FORCE_INORDER_COLLECTION = 0x08;
|
||||
public static final int GET_DOCLIST = 0x02; // get the documents actually returned in a response
|
||||
public static final int GET_SCORES = 0x01;
|
||||
|
||||
|
@ -1394,7 +1396,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
|
|||
float[] scores;
|
||||
|
||||
boolean needScores = (cmd.getFlags() & GET_SCORES) != 0;
|
||||
|
||||
boolean terminateEarly = (cmd.getFlags() & TERMINATE_EARLY) == TERMINATE_EARLY;
|
||||
|
||||
Query query = QueryUtils.makeQueryable(cmd.getQuery());
|
||||
|
||||
ProcessedFilter pf = getProcessedFilter(cmd.getFilter(), cmd.getFilterList());
|
||||
|
@ -1446,7 +1449,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
|
|||
}
|
||||
};
|
||||
}
|
||||
|
||||
if (terminateEarly) {
|
||||
collector = new EarlyTerminatingCollector(collector, cmd.len);
|
||||
}
|
||||
if( timeAllowed > 0 ) {
|
||||
collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed);
|
||||
}
|
||||
|
@ -1481,6 +1486,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
|
|||
topCollector = TopFieldCollector.create(weightSort(cmd.getSort()), len, false, needScores, needScores, true);
|
||||
}
|
||||
Collector collector = topCollector;
|
||||
if (terminateEarly) {
|
||||
collector = new EarlyTerminatingCollector(collector, cmd.len);
|
||||
}
|
||||
if( timeAllowed > 0 ) {
|
||||
collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed);
|
||||
}
|
||||
|
@ -1529,6 +1537,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
|
|||
DocSet set;
|
||||
|
||||
boolean needScores = (cmd.getFlags() & GET_SCORES) != 0;
|
||||
boolean terminateEarly = (cmd.getFlags() & TERMINATE_EARLY) == TERMINATE_EARLY;
|
||||
int maxDoc = maxDoc();
|
||||
int smallSetSize = maxDoc>>6;
|
||||
|
||||
|
@ -1568,7 +1577,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
|
|||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (terminateEarly) {
|
||||
collector = new EarlyTerminatingCollector(collector, cmd.len);
|
||||
}
|
||||
if( timeAllowed > 0 ) {
|
||||
collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed);
|
||||
}
|
||||
|
@ -1604,7 +1615,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
|
|||
|
||||
DocSetCollector setCollector = new DocSetDelegateCollector(maxDoc>>6, maxDoc, topCollector);
|
||||
Collector collector = setCollector;
|
||||
|
||||
if (terminateEarly) {
|
||||
collector = new EarlyTerminatingCollector(collector, cmd.len);
|
||||
}
|
||||
if( timeAllowed > 0 ) {
|
||||
collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed );
|
||||
}
|
||||
|
|
|
@ -22,7 +22,9 @@ import java.util.Iterator;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.DisMaxParams;
|
||||
import org.apache.solr.common.params.GroupParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
|
@ -33,15 +35,23 @@ import org.apache.solr.handler.component.ResponseBuilder;
|
|||
import org.apache.solr.handler.component.SearchComponent;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.search.EarlyTerminatingCollectorException;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class SpellCheckCollator {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(SpellCheckCollator.class);
|
||||
private int maxCollations = 1;
|
||||
private int maxCollationTries = 0;
|
||||
private int maxCollationEvaluations = 10000;
|
||||
private boolean suggestionsMayOverlap = false;
|
||||
private int docCollectionLimit = 0;
|
||||
private boolean reportHits = true;
|
||||
|
||||
public List<SpellCheckCollation> collate(SpellingResult result, String originalQuery, ResponseBuilder ultimateResponse,
|
||||
int maxCollations, int maxTries, int maxEvaluations, boolean suggestionsMayOverlap) {
|
||||
List<SpellCheckCollation> collations = new ArrayList<SpellCheckCollation>();
|
||||
public List<SpellCheckCollation> collate(SpellingResult result,
|
||||
String originalQuery, ResponseBuilder ultimateResponse) {
|
||||
List<SpellCheckCollation> collations = new ArrayList<SpellCheckCollation>();
|
||||
|
||||
QueryComponent queryComponent = null;
|
||||
if (ultimateResponse.components != null) {
|
||||
|
@ -54,6 +64,7 @@ public class SpellCheckCollator {
|
|||
}
|
||||
|
||||
boolean verifyCandidateWithQuery = true;
|
||||
int maxTries = maxCollationTries;
|
||||
int maxNumberToIterate = maxTries;
|
||||
if (maxTries < 1) {
|
||||
maxTries = 1;
|
||||
|
@ -65,10 +76,17 @@ public class SpellCheckCollator {
|
|||
maxTries = 1;
|
||||
verifyCandidateWithQuery = false;
|
||||
}
|
||||
docCollectionLimit = docCollectionLimit > 0 ? docCollectionLimit : 0;
|
||||
int maxDocId = -1;
|
||||
if (verifyCandidateWithQuery && docCollectionLimit > 0) {
|
||||
IndexReader reader = ultimateResponse.req.getSearcher().getIndexReader();
|
||||
maxDocId = reader.maxDoc();
|
||||
}
|
||||
|
||||
int tryNo = 0;
|
||||
int collNo = 0;
|
||||
PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions(), maxNumberToIterate, maxEvaluations, suggestionsMayOverlap);
|
||||
PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions(),
|
||||
maxNumberToIterate, maxCollationEvaluations, suggestionsMayOverlap);
|
||||
while (tryNo < maxTries && collNo < maxCollations && possibilityIter.hasNext()) {
|
||||
|
||||
PossibilityIterator.RankedSpellPossibility possibility = possibilityIter.next();
|
||||
|
@ -96,12 +114,25 @@ public class SpellCheckCollator {
|
|||
}
|
||||
params.set(CommonParams.Q, collationQueryStr);
|
||||
params.remove(CommonParams.START);
|
||||
params.set(CommonParams.ROWS, "" + docCollectionLimit);
|
||||
// we don't want any stored fields
|
||||
params.set(CommonParams.FL, "id");
|
||||
params.set(CommonParams.ROWS, "0");
|
||||
// we'll sort by doc id to ensure no scoring is done.
|
||||
params.set(CommonParams.SORT, "_docid_ asc");
|
||||
// If a dismax query, don't add unnecessary clauses for scoring
|
||||
params.remove(DisMaxParams.TIE);
|
||||
params.remove(DisMaxParams.PF);
|
||||
params.remove(DisMaxParams.PF2);
|
||||
params.remove(DisMaxParams.PF3);
|
||||
params.remove(DisMaxParams.BQ);
|
||||
params.remove(DisMaxParams.BF);
|
||||
// Collate testing does not support Grouping (see SOLR-2577)
|
||||
params.remove(GroupParams.GROUP);
|
||||
|
||||
// creating a request here... make sure to close it!
|
||||
ResponseBuilder checkResponse = new ResponseBuilder(new LocalSolrQueryRequest(ultimateResponse.req.getCore(), params),new SolrQueryResponse(), Arrays.<SearchComponent>asList(queryComponent));
|
||||
ResponseBuilder checkResponse = new ResponseBuilder(
|
||||
new LocalSolrQueryRequest(ultimateResponse.req.getCore(), params),
|
||||
new SolrQueryResponse(), Arrays.<SearchComponent> asList(queryComponent));
|
||||
checkResponse.setQparser(ultimateResponse.getQparser());
|
||||
checkResponse.setFilters(ultimateResponse.getFilters());
|
||||
checkResponse.setQueryString(collationQueryStr);
|
||||
|
@ -109,8 +140,23 @@ public class SpellCheckCollator {
|
|||
|
||||
try {
|
||||
queryComponent.prepare(checkResponse);
|
||||
if (docCollectionLimit > 0) {
|
||||
int f = checkResponse.getFieldFlags();
|
||||
checkResponse.setFieldFlags(f |= SolrIndexSearcher.TERMINATE_EARLY);
|
||||
if (reportHits) {
|
||||
f = checkResponse.getFieldFlags();
|
||||
checkResponse.setFieldFlags(f |= SolrIndexSearcher.FORCE_INORDER_COLLECTION);
|
||||
}
|
||||
}
|
||||
queryComponent.process(checkResponse);
|
||||
hits = (Integer) checkResponse.rsp.getToLog().get("hits");
|
||||
} catch (EarlyTerminatingCollectorException etce) {
|
||||
assert (docCollectionLimit > 0);
|
||||
if (etce.getLastDocId() + 1 == maxDocId) {
|
||||
hits = docCollectionLimit;
|
||||
} else {
|
||||
hits = maxDocId / ((etce.getLastDocId() + 1) / docCollectionLimit);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Exception trying to re-query to check if a spell check possibility would return any hits.", e);
|
||||
} finally {
|
||||
|
@ -191,6 +237,31 @@ public class SpellCheckCollator {
|
|||
offset += corr.length() - oneForReqOrProhib - (tok.endOffset() - tok.startOffset());
|
||||
}
|
||||
return collation.toString();
|
||||
}
|
||||
public SpellCheckCollator setMaxCollations(int maxCollations) {
|
||||
this.maxCollations = maxCollations;
|
||||
return this;
|
||||
}
|
||||
public SpellCheckCollator setMaxCollationTries(int maxCollationTries) {
|
||||
this.maxCollationTries = maxCollationTries;
|
||||
return this;
|
||||
}
|
||||
public SpellCheckCollator setMaxCollationEvaluations(
|
||||
int maxCollationEvaluations) {
|
||||
this.maxCollationEvaluations = maxCollationEvaluations;
|
||||
return this;
|
||||
}
|
||||
public SpellCheckCollator setSuggestionsMayOverlap(
|
||||
boolean suggestionsMayOverlap) {
|
||||
this.suggestionsMayOverlap = suggestionsMayOverlap;
|
||||
return this;
|
||||
}
|
||||
public SpellCheckCollator setDocCollectionLimit(int docCollectionLimit) {
|
||||
this.docCollectionLimit = docCollectionLimit;
|
||||
return this;
|
||||
}
|
||||
public SpellCheckCollator setReportHits(boolean reportHits) {
|
||||
this.reportHits = reportHits;
|
||||
return this;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -42,21 +42,24 @@ import org.junit.Test;
|
|||
public class SpellCheckCollatorTest extends SolrTestCaseJ4 {
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-spellcheckcomponent.xml", "schema.xml");
|
||||
assertNull(h.validateUpdate(adoc("id", "0", "lowerfilt", "faith hope and love")));
|
||||
initCore("solrconfig-spellcheckcomponent.xml", "schema.xml");
|
||||
assertNull(h.validateUpdate(adoc("id", "0", "lowerfilt", "faith hope and love", "teststop", "metanoia")));
|
||||
assertNull(h.validateUpdate(adoc("id", "1", "lowerfilt", "faith hope and loaves")));
|
||||
assertNull(h.validateUpdate(adoc("id", "2", "lowerfilt", "fat hops and loaves")));
|
||||
assertNull(h.validateUpdate(adoc("id", "3", "lowerfilt", "faith of homer")));
|
||||
assertNull(h.validateUpdate(adoc("id", "3", "lowerfilt", "faith of homer", "teststop", "metanoia")));
|
||||
assertNull(h.validateUpdate(adoc("id", "4", "lowerfilt", "fat of homer")));
|
||||
assertNull(h.validateUpdate(adoc("id", "5", "lowerfilt1", "peace")));
|
||||
assertNull(h.validateUpdate(adoc("id", "6", "lowerfilt", "hyphenated word")));
|
||||
assertNull(h.validateUpdate(adoc("id", "7", "teststop", "Jane filled out a form at Charles De Gaulle")));
|
||||
assertNull(h.validateUpdate(adoc("id", "8", "teststop", "Dick flew from Heathrow")));
|
||||
assertNull(h.validateUpdate(adoc("id", "9", "teststop", "Jane is stuck in customs because Spot chewed up the form")));
|
||||
assertNull(h.validateUpdate(adoc("id", "10", "teststop", "Once in Paris Dick built a fire on the hearth")));
|
||||
assertNull(h.validateUpdate(adoc("id", "11", "teststop", "Dick waited for Jane as he watched the sparks flow upward")));
|
||||
assertNull(h.validateUpdate(adoc("id", "12", "teststop", "This June parisian rendez-vous is ruined because of a customs snafu")));
|
||||
assertNull(h.validateUpdate(adoc("id", "13", "teststop", "partisan political machine")));
|
||||
assertNull(h.validateUpdate(adoc("id", "7", "teststop", "Jane filled out a form at Charles De Gaulle")));
|
||||
assertNull(h.validateUpdate(adoc("id", "8", "teststop", "Dick flew from Heathrow")));
|
||||
assertNull(h.validateUpdate(adoc("id", "9", "teststop", "Jane is stuck in customs because Spot chewed up the form")));
|
||||
assertNull(h.validateUpdate(adoc("id", "10", "teststop", "Once in Paris Dick built a fire on the hearth")));
|
||||
assertNull(h.validateUpdate(adoc("id", "11", "teststop", "Dick waited for Jane as he watched the sparks flow upward")));
|
||||
assertNull(h.validateUpdate(adoc("id", "12", "teststop", "This June parisian rendez-vous is ruined because of a customs snafu")));
|
||||
assertNull(h.validateUpdate(adoc("id", "13", "teststop", "partisan political machine", "teststop", "metanoia")));
|
||||
assertNull(h.validateUpdate(adoc("id", "14", "teststop", "metanoia")));
|
||||
assertNull(h.validateUpdate(adoc("id", "15", "teststop", "metanoia")));
|
||||
assertNull(h.validateUpdate(adoc("id", "16", "teststop", "metanoia")));
|
||||
assertNull(h.validateUpdate(commit()));
|
||||
}
|
||||
|
||||
|
@ -430,4 +433,71 @@ public class SpellCheckCollatorTest extends SolrTestCaseJ4 {
|
|||
);
|
||||
}
|
||||
}
|
||||
@Test
|
||||
public void testEstimatedHitCounts() throws Exception {
|
||||
assertQ(
|
||||
req(
|
||||
SpellCheckComponent.COMPONENT_NAME, "true",
|
||||
SpellCheckComponent.SPELLCHECK_DICT, "direct",
|
||||
SpellingParams.SPELLCHECK_COUNT, "1",
|
||||
SpellingParams.SPELLCHECK_COLLATE, "true",
|
||||
SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, "1",
|
||||
SpellingParams.SPELLCHECK_MAX_COLLATIONS, "1",
|
||||
SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true",
|
||||
"qt", "spellCheckCompRH",
|
||||
CommonParams.Q, "teststop:metnoia"
|
||||
),
|
||||
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/str[@name='collationQuery']='teststop:metanoia'",
|
||||
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/int[@name='hits']=6"
|
||||
);
|
||||
assertQ(
|
||||
req(
|
||||
SpellCheckComponent.COMPONENT_NAME, "true",
|
||||
SpellCheckComponent.SPELLCHECK_DICT, "direct",
|
||||
SpellingParams.SPELLCHECK_COUNT, "1",
|
||||
SpellingParams.SPELLCHECK_COLLATE, "true",
|
||||
SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, "1",
|
||||
SpellingParams.SPELLCHECK_MAX_COLLATIONS, "1",
|
||||
SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true",
|
||||
SpellingParams.SPELLCHECK_COLLATE_MAX_COLLECT_DOCS, "1",
|
||||
"qt", "spellCheckCompRH",
|
||||
CommonParams.Q, "teststop:metnoia"
|
||||
),
|
||||
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/str[@name='collationQuery']='teststop:metanoia'",
|
||||
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/int[@name='hits']=17"
|
||||
);
|
||||
assertQ(
|
||||
req(
|
||||
SpellCheckComponent.COMPONENT_NAME, "true",
|
||||
SpellCheckComponent.SPELLCHECK_DICT, "direct",
|
||||
SpellingParams.SPELLCHECK_COUNT, "1",
|
||||
SpellingParams.SPELLCHECK_COLLATE, "true",
|
||||
SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, "1",
|
||||
SpellingParams.SPELLCHECK_MAX_COLLATIONS, "1",
|
||||
SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true",
|
||||
SpellingParams.SPELLCHECK_COLLATE_MAX_COLLECT_DOCS, "3",
|
||||
"qt", "spellCheckCompRH",
|
||||
CommonParams.Q, "teststop:metnoia"
|
||||
),
|
||||
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/str[@name='collationQuery']='teststop:metanoia'",
|
||||
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/int[@name='hits']=4"
|
||||
);
|
||||
assertQ(
|
||||
req(
|
||||
SpellCheckComponent.COMPONENT_NAME, "true",
|
||||
SpellCheckComponent.SPELLCHECK_DICT, "direct",
|
||||
SpellingParams.SPELLCHECK_COUNT, "1",
|
||||
SpellingParams.SPELLCHECK_COLLATE, "true",
|
||||
SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, "1",
|
||||
SpellingParams.SPELLCHECK_MAX_COLLATIONS, "1",
|
||||
SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true",
|
||||
SpellingParams.SPELLCHECK_COLLATE_MAX_COLLECT_DOCS, "100",
|
||||
"qt", "spellCheckCompRH",
|
||||
CommonParams.Q, "teststop:metnoia"
|
||||
),
|
||||
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/str[@name='collationQuery']='teststop:metanoia'",
|
||||
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/int[@name='hits']=6"
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -129,13 +129,32 @@ public interface SpellingParams {
|
|||
* </p>
|
||||
*/
|
||||
public static final String SPELLCHECK_MAX_COLLATION_EVALUATIONS = SPELLCHECK_PREFIX + "maxCollationEvaluations";
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* For use with {@link SpellingParams#SPELLCHECK_MAX_COLLATION_TRIES} and
|
||||
* {@link SpellingParams#SPELLCHECK_COLLATE_EXTENDED_RESULTS}.
|
||||
* A performance optimization in cases where the exact number of hits a collation would return is not needed.
|
||||
* Specify "0" to return the exact # of hits, otherwise give the maximum documents Lucene should collect
|
||||
* with which to base an estimate. The higher the value the more likely the estimates will be accurate
|
||||
* (at expense of performance).
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* The default is 0 (report exact hit-counts) when {@link SpellingParams#SPELLCHECK_COLLATE_EXTENDED_RESULTS} is TRUE.
|
||||
* When {@link SpellingParams#SPELLCHECK_COLLATE_EXTENDED_RESULTS} is FALSE, this optimization is always performed.
|
||||
* </p>
|
||||
*/
|
||||
public static final String SPELLCHECK_COLLATE_MAX_COLLECT_DOCS = SPELLCHECK_PREFIX + "collateMaxCollectDocs";
|
||||
/**
|
||||
* <p>
|
||||
* Whether to use the Extended Results Format for collations.
|
||||
* Includes "before>after" pairs to easily allow clients to generate messages like "no results for PORK. did you mean POLK?"
|
||||
* Also indicates the # of hits each collation will return on re-query. Default=false, which retains 1.4-compatible output.
|
||||
* </p>
|
||||
* <p>
|
||||
* Note: that if {@link SpellingParams#SPELLCHECK_COLLATE_MAX_COLLECT_DOCS} is set to a value greater than 0,
|
||||
* then the hit counts returned by this will be estimated.
|
||||
* </p>
|
||||
*/
|
||||
public static final String SPELLCHECK_COLLATE_EXTENDED_RESULTS = SPELLCHECK_PREFIX + "collateExtendedResults";
|
||||
|
||||
|
|
Loading…
Reference in New Issue