SOLR-14413 allow timeAllowed and cursorMark parameters

closes #1436
This commit is contained in:
John Gallagher 2020-04-17 12:51:35 -04:00 committed by Mike Drob
parent 8c234b2879
commit 70f461ee45
No known key found for this signature in database
GPG Key ID: 3E48C0C6EF362B9E
8 changed files with 103 additions and 22 deletions

View File

@ -226,6 +226,8 @@ Improvements
* SOLR-14987: Reuse HttpSolrClient per node vs. one per Solr core when using CloudSolrStream (Timothy Potter)
* SOLR-14413: Allow timeAllowed and cursorMark parameters together. (John Gallagher via Mike Drob)
Optimizations
---------------------
* SOLR-14975: Optimize CoreContainer.getAllCoreNames, getLoadedCoreNames and getCoreDescriptors. (Bruno Roustant)

View File

@ -358,11 +358,6 @@ public class QueryComponent extends SearchComponent
// -1 as flag if not set.
long timeAllowed = params.getLong(CommonParams.TIME_ALLOWED, -1L);
if (null != rb.getCursorMark() && 0 < timeAllowed) {
// fundamentally incompatible
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can not search using both " +
CursorMarkParams.CURSOR_MARK_PARAM + " and " + CommonParams.TIME_ALLOWED);
}
QueryCommand cmd = rb.createQueryCommand();
cmd.setTimeAllowed(timeAllowed);

View File

@ -34,6 +34,7 @@ import org.apache.solr.cloud.ZkController;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.CursorMarkParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.util.NamedList;
@ -379,6 +380,13 @@ public class SearchHandler extends RequestHandlerBase implements SolrCoreAware,
log.warn("Query: {}; ", req.getParamString(), ex);
if( rb.rsp.getResponse() == null) {
rb.rsp.addResponse(new SolrDocumentList());
// If a cursorMark was passed, and we didn't progress, set
// the nextCursorMark to the same position
String cursorStr = rb.req.getParams().get(CursorMarkParams.CURSOR_MARK_PARAM);
if (null != cursorStr) {
rb.rsp.add(CursorMarkParams.CURSOR_MARK_NEXT, cursorStr);
}
}
if(rb.isDebug()) {
NamedList debug = new NamedList();

View File

@ -30,12 +30,21 @@
<xi:include href="solrconfig.snippet.randomindexconfig.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
<searchComponent name="delayingSearchComponent"
class="org.apache.solr.search.DelayingSearchComponent"/>
<requestHandler name="/select" class="solr.SearchHandler">
<arr name="first-components">
<str>delayingSearchComponent</str>
</arr>
</requestHandler>
<updateHandler class="solr.DirectUpdateHandler2">
<updateLog>
<str name="dir">${solr.ulog.dir:}</str>
</updateLog>
</updateHandler>
<!-- deep paging better play nice with caching -->
<query>
<!-- no wautowarming, it screws up our ability to sanity check cache stats in tests -->

View File

@ -25,6 +25,8 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.lucene.util.SentinelIntSet;
import org.apache.lucene.util.TestUtil;
@ -46,6 +48,7 @@ import org.junit.BeforeClass;
import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_NEXT;
import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_PARAM;
import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_START;
import static org.apache.solr.common.params.CommonParams.TIME_ALLOWED;
import static org.apache.solr.common.util.Utils.fromJSONString;
/**
@ -104,13 +107,6 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
CURSOR_MARK_PARAM, CURSOR_MARK_START),
ErrorCode.BAD_REQUEST, "_docid_");
// using cursor w/ timeAllowed
assertFail(params("q", "*:*",
"sort", "id desc",
CommonParams.TIME_ALLOWED, "1000",
CURSOR_MARK_PARAM, CURSOR_MARK_START),
ErrorCode.BAD_REQUEST, CommonParams.TIME_ALLOWED);
// using cursor w/ grouping
assertFail(params("q", "*:*",
"sort", "id desc",
@ -499,6 +495,83 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
));
}
/**
* test that timeAllowed parameter can be used with cursors
* uses DelayingSearchComponent in solrconfig-deeppaging.xml
*/
public void testTimeAllowed() throws Exception {
String wontExceedTimeout = "10000";
int numDocs = 1000;
List<String> ids = IntStream.range(0, 1000).mapToObj(String::valueOf).collect(Collectors.toList());
// Shuffle to test ordering
Collections.shuffle(ids, random());
for (String id : ids) {
assertU(adoc("id", id, "name", "a" + id));
if (random().nextInt(numDocs) == 0) {
assertU(commit()); // sometimes make multiple segments
}
}
assertU(commit());
Collections.sort(ids);
String cursorMark, nextCursorMark = CURSOR_MARK_START;
SolrParams params = params("q", "name:a*",
"fl", "id",
"sort", "id asc",
"rows", "50",
"sleep", "10");
List<String> foundDocIds = new ArrayList<>();
String[] timeAllowedVariants = {"1", "50", wontExceedTimeout};
int partialCount = 0;
do {
cursorMark = nextCursorMark;
for (String timeAllowed : timeAllowedVariants) {
// execute the query
String json = assertJQ(req(params, CURSOR_MARK_PARAM, cursorMark, TIME_ALLOWED, timeAllowed));
@SuppressWarnings({"unchecked"})
Map<Object, Object> response = (Map<Object, Object>) fromJSONString(json);
@SuppressWarnings({"unchecked"})
Map<Object, Object> responseHeader = (Map<Object, Object>) response.get("responseHeader");
@SuppressWarnings({"unchecked"})
Map<Object, Object> responseBody = (Map<Object, Object>) response.get("response");
nextCursorMark = (String) response.get(CURSOR_MARK_NEXT);
// count occurance of partialResults (confirm at the end at least one)
if (responseHeader.containsKey("partialResults")) {
partialCount++;
}
// add the ids found (confirm we found all at the end in correct order)
@SuppressWarnings({"unchecked"})
List<Map<Object, Object>> docs = (List<Map<Object, Object>>) (responseBody.get("docs"));
for (Map<Object, Object> doc : docs) {
foundDocIds.add(doc.get("id").toString());
}
// break out of the timeAllowed variants as soon as we progress
if (!cursorMark.equals(nextCursorMark)) {
break;
}
}
} while (!cursorMark.equals(nextCursorMark));
List<String> sortedFoundDocIds = new ArrayList<>();
sortedFoundDocIds.addAll(foundDocIds);
Collections.sort(sortedFoundDocIds);
// Note: it is not guaranteed that all docs will be found, because a query may time out
// before reaching all segments, this causes documents in the skipped segements to be skipped
// in the overall result set as the cursor pages through.
assertEquals("Should have found last doc id eventually", ids.get(ids.size() -1), foundDocIds.get(foundDocIds.size() -1));
assertEquals("Documents arrived in sorted order within and between pages", sortedFoundDocIds, foundDocIds);
assertTrue("Should have experienced at least one partialResult", partialCount > 0);
}
/**
* test that our assumptions about how caches are affected hold true
*/

View File

@ -132,13 +132,6 @@ public class DistribCursorPagingTest extends AbstractFullDistribZkTestBase {
CURSOR_MARK_PARAM, CURSOR_MARK_START),
ErrorCode.BAD_REQUEST, "_docid_");
// using cursor w/ timeAllowed
assertFail(params("q", "*:*",
"sort", "id desc",
CommonParams.TIME_ALLOWED, "1000",
CURSOR_MARK_PARAM, CURSOR_MARK_START),
ErrorCode.BAD_REQUEST, CommonParams.TIME_ALLOWED);
// using cursor w/ grouping
assertFail(params("q", "*:*",
"sort", "id desc",

View File

@ -206,7 +206,7 @@ The default value of this parameter is blank, which causes no extra "explain inf
== timeAllowed Parameter
This parameter specifies the amount of time, in milliseconds, allowed for a search to complete. If this time expires before the search is complete, any partial results will be returned, but values such as `numFound`, <<faceting.adoc#faceting,facet>> counts, and result <<the-stats-component.adoc#the-stats-component,stats>> may not be accurate for the entire result set. In case of expiration, if `omitHeader` isn't set to `true` the response header contains a special flag called `partialResults`.
This parameter specifies the amount of time, in milliseconds, allowed for a search to complete. If this time expires before the search is complete, any partial results will be returned, but values such as `numFound`, <<faceting.adoc#faceting,facet>> counts, and result <<the-stats-component.adoc#the-stats-component,stats>> may not be accurate for the entire result set. In case of expiration, if `omitHeader` isn't set to `true` the response header contains a special flag called `partialResults`. When using `timeAllowed` in combination with <<pagination-of-results.adoc#using-cursors,`cursorMark`>>, and the `partialResults` flag is present, some matching documents may have been skipped in the result set. Additionally, if the `partialResults` flag is present, `cursorMark` can match `nextCursorMark` even if there may be more results
[source,json]
----
@ -252,7 +252,7 @@ The default value of this parameter is `false`.
This parameter may be set to either `true` or `false`.
If set to `true`, this parameter excludes the header from the returned results. The header contains information about the request, such as the time it took to complete. The default value for this parameter is `false`.
If set to `true`, this parameter excludes the header from the returned results. The header contains information about the request, such as the time it took to complete. The default value for this parameter is `false`. When using parameters such as <<common-query-parameters.adoc#timeallowed-parameter,`timeAllowed`>>, and <<solrcloud-query-routing-and-read-tolerance.adoc#shards-tolerant-parameter,`shards.tolerant`>>, which can lead to partial results, it is advisable to keep the header, so that the `partialResults` flag can be checked, and values such as `numFound`, `nextCursorMark`, <<faceting.adoc#faceting,Facet>> counts, and result <<the-stats-component.adoc#the-stats-component,Stats>> can be interpreted in the context of partial results.
== wt Parameter

View File

@ -97,6 +97,7 @@ There are a few important constraints to be aware of when using `cursorMark` par
. `cursorMark` and `start` are mutually exclusive parameters.
* Your requests must either not include a `start` parameter, or it must be specified with a value of "```0```".
. When using the <<common-query-parameters.adoc#timeallowed-parameter,`timeAllowed` request param>>, partial results may be returned. If time expires before the search is complete - as indicated when the `responseHeader` includes `"partialResults": true`, some matching documents may have been skipped. Additionally, if `cursorMark` matches `nextCursorMark`, you cannot be sure that there are no more results. In these situation, consider increasing `timeAllowed` and reissuing the query. When the `responseHeader` no longer includes `"partialResults": true` and `cursorMark` matches `nextCursorMark`, there are no more results.
. `sort` clauses must include the uniqueKey field (either `asc` or `desc`).
* If `id` is your uniqueKey field, then sort parameters like `id asc` and `name asc, id desc` would both work fine, but `name asc` by itself would not
. Sorts including <<working-with-dates.adoc#working-with-dates,Date Math>> based functions that involve calculations relative to `NOW` will cause confusing results, since every document will get a new sort value on every subsequent request. This can easily result in cursors that never end, and constantly return the same documents over and over even if the documents are never updated.