SOLR-5463: new 'cursorMark' request param for deep paging of sorted result sets

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1556036 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2014-01-06 21:15:45 +00:00
parent 5bf2428e9d
commit 5efc4132f1
19 changed files with 2573 additions and 152 deletions

View File

@ -53,6 +53,9 @@ New Features
Andrzej Bialecki, Patrick Hunt, Wolfgang Hoschek, Roman Shaposhnik,
Eric Wong)
* SOLR-5463: new 'cursorMark' request param for deep paging of sorted result sets
(sarowe, hossman)
Other Changes
----------------------

View File

@ -17,8 +17,6 @@
package org.apache.solr.handler.component;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.ReaderUtil;
@ -33,12 +31,12 @@ import org.apache.lucene.search.grouping.GroupDocs;
import org.apache.lucene.search.grouping.SearchGroup;
import org.apache.lucene.search.grouping.TopGroups;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.*;
import org.apache.solr.common.params.CursorMarkParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
@ -48,6 +46,7 @@ import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.CursorMark;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocListAndSet;
@ -82,6 +81,8 @@ import org.apache.solr.search.grouping.endresulttransformer.MainEndResultTransfo
import org.apache.solr.search.grouping.endresulttransformer.SimpleEndResultTransformer;
import org.apache.solr.util.SolrPluginUtils;
import org.apache.commons.lang.StringUtils;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
@ -147,7 +148,14 @@ public class QueryComponent extends SearchComponent
rb.setQuery( q );
rb.setSortSpec( parser.getSort(true) );
rb.setQparser(parser);
rb.setScoreDoc(parser.getPaging());
final String cursorStr = rb.req.getParams().get(CursorMarkParams.CURSOR_MARK_PARAM);
if (null != cursorStr) {
final CursorMark cursorMark = new CursorMark(rb.req.getSchema(),
rb.getSortSpec());
cursorMark.parseSerializedTotem(cursorStr);
rb.setCursorMark(cursorMark);
}
String[] fqs = req.getParams().getParams(CommonParams.FQ);
if (fqs!=null && fqs.length!=0) {
@ -171,9 +179,21 @@ public class QueryComponent extends SearchComponent
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
boolean grouping = params.getBool(GroupParams.GROUP, false);
if (!grouping) {
return;
if (params.getBool(GroupParams.GROUP, false)) {
prepareGrouping(rb);
}
}
private void prepareGrouping(ResponseBuilder rb) throws IOException {
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
if (null != rb.getCursorMark()) {
// It's hard to imagine, conceptually, what it would mean to combine
// grouping with a cursor - so for now we just don't allow the combination at all
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can not use Grouping with " +
CursorMarkParams.CURSOR_MARK_PARAM);
}
SolrIndexSearcher.QueryCommand cmd = rb.getQueryCommand();
@ -242,6 +262,11 @@ public class QueryComponent extends SearchComponent
// -1 as flag if not set.
long timeAllowed = (long)params.getInt( CommonParams.TIME_ALLOWED, -1 );
if (null != rb.getCursorMark() && 0 < timeAllowed) {
// fundementally incompatible
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can not search using both " +
CursorMarkParams.CURSOR_MARK_PARAM + " and " + CommonParams.TIME_ALLOWED);
}
// Optional: This could also be implemented by the top-level searcher sending
// a filter that lists the ids... that would be transparent to
@ -434,13 +459,18 @@ public class QueryComponent extends SearchComponent
searcher.search(result,cmd);
rb.setResult( result );
ResultContext ctx = new ResultContext();
ctx.docs = rb.getResults().docList;
ctx.query = rb.getQuery();
rsp.add("response", ctx);
rsp.getToLog().add("hits", rb.getResults().docList.matches());
if ( ! rb.req.getParams().getBool(ShardParams.IS_SHARD,false) ) {
if (null != rb.getNextCursorMark()) {
rb.rsp.add(CursorMarkParams.CURSOR_MARK_NEXT,
rb.getNextCursorMark().getSerializedTotem());
}
}
doFieldSortValues(rb, searcher);
doPrefetch(rb);
}
@ -452,6 +482,8 @@ public class QueryComponent extends SearchComponent
// The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't
// currently have an option to return sort field values. Because of this, we
// take the documents given and re-derive the sort values.
//
// TODO: See SOLR-5595
boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES,false);
if(fsv){
NamedList<Object[]> sortVals = new NamedList<Object[]>(); // order is important for the sort fields
@ -696,6 +728,10 @@ public class QueryComponent extends SearchComponent
}
rb.rsp.add("response", rb._responseDocs);
if (null != rb.getNextCursorMark()) {
rb.rsp.add(CursorMarkParams.CURSOR_MARK_NEXT,
rb.getNextCursorMark().getSerializedTotem());
}
}
private void createDistributedIdf(ResponseBuilder rb) {
@ -904,11 +940,66 @@ public class QueryComponent extends SearchComponent
// TODO: use ResponseBuilder (w/ comments) or the request context?
rb.resultIds = resultIds;
rb._responseDocs = responseDocs;
populateNextCursorMarkFromMergedShards(rb);
if (partialResults) {
rb.rsp.getResponseHeader().add( "partialResults", Boolean.TRUE );
}
}
/**
* Inspects the state of the {@link ResponseBuilder} and populates the next
* {@link ResponseBuilder#setNextCursorMark} as appropriate based on the merged
* sort values from individual shards
*
* @param rb A <code>ResponseBuilder</code> that already contains merged
* <code>ShardDocs</code> in <code>resultIds</code>, may or may not be
* part of a Cursor based request (method will NOOP if not needed)
*/
private void populateNextCursorMarkFromMergedShards(ResponseBuilder rb) {
final CursorMark lastCursorMark = rb.getCursorMark();
if (null == lastCursorMark) {
// Not a cursor based request
return; // NOOP
}
assert null != rb.resultIds : "resultIds was not set in ResponseBuilder";
Collection<ShardDoc> docsOnThisPage = rb.resultIds.values();
if (0 == docsOnThisPage.size()) {
// nothing more matching query, re-use existing totem so user can "resume"
// search later if it makes sense for this sort.
rb.setNextCursorMark(lastCursorMark);
return;
}
ShardDoc lastDoc = null;
// ShardDoc and rb.resultIds are weird structures to work with...
for (ShardDoc eachDoc : docsOnThisPage) {
if (null == lastDoc || lastDoc.positionInResponse < eachDoc.positionInResponse) {
lastDoc = eachDoc;
}
}
SortField[] sortFields = lastCursorMark.getSortSpec().getSort().getSort();
List<Object> nextCursorMarkValues = new ArrayList<Object>(sortFields.length);
for (SortField sf : sortFields) {
if (sf.getType().equals(SortField.Type.SCORE)) {
assert null != lastDoc.score : "lastDoc has null score";
nextCursorMarkValues.add(lastDoc.score);
} else {
assert null != sf.getField() : "SortField has null field";
List<Object> fieldVals = (List<Object>) lastDoc.sortFieldValues.get(sf.getField());
nextCursorMarkValues.add(fieldVals.get(lastDoc.orderInShard));
}
}
CursorMark nextCursorMark = lastCursorMark.createNext(nextCursorMarkValues);
assert null != nextCursorMark : "null nextCursorMark";
rb.setNextCursorMark(nextCursorMark);
}
private NamedList unmarshalSortValues(SortSpec sortSpec,
NamedList sortFieldValues,
IndexSchema schema) {
@ -982,6 +1073,7 @@ public class QueryComponent extends SearchComponent
// no need for a sort, we already have order
sreq.params.remove(CommonParams.SORT);
sreq.params.remove(CursorMarkParams.CURSOR_MARK_PARAM);
// we already have the field sort values
sreq.params.remove(ResponseBuilder.FIELD_SORT_VALUES);

View File

@ -18,7 +18,6 @@
package org.apache.solr.handler.component;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.grouping.SearchGroup;
import org.apache.lucene.search.grouping.TopGroups;
import org.apache.lucene.util.BytesRef;
@ -30,6 +29,7 @@ import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.CursorMark;
import org.apache.solr.search.DocListAndSet;
import org.apache.solr.search.QParser;
import org.apache.solr.search.SolrIndexSearcher;
@ -70,9 +70,8 @@ public class ResponseBuilder
private List<Query> filters = null;
private SortSpec sortSpec = null;
private GroupingSpecification groupingSpec;
//used for handling deep paging
private ScoreDoc scoreDoc;
private CursorMark cursorMark;
private CursorMark nextCursorMark;
private DocListAndSet results = null;
private NamedList<Object> debugInfo = null;
@ -395,7 +394,7 @@ public class ResponseBuilder
.setLen(getSortSpec().getCount())
.setFlags(getFieldFlags())
.setNeedDocSet(isNeedDocSet())
.setScoreDoc(getScoreDoc()); //Issue 1726
.setCursorMark(getCursorMark());
return cmd;
}
@ -407,6 +406,10 @@ public class ResponseBuilder
if (result.isPartialResults()) {
rsp.getResponseHeader().add("partialResults", Boolean.TRUE);
}
if (null != cursorMark) {
assert null != result.getNextCursorMark() : "using cursor but no next cursor set";
this.setNextCursorMark(result.getNextCursorMark());
}
}
public long getNumberDocumentsFound() {
@ -416,13 +419,17 @@ public class ResponseBuilder
return _responseDocs.getNumFound();
}
public ScoreDoc getScoreDoc()
{
return scoreDoc;
public CursorMark getCursorMark() {
return cursorMark;
}
public void setCursorMark(CursorMark cursorMark) {
this.cursorMark = cursorMark;
}
public void setScoreDoc(ScoreDoc scoreDoc)
{
this.scoreDoc = scoreDoc;
public CursorMark getNextCursorMark() {
return nextCursorMark;
}
public void setNextCursorMark(CursorMark nextCursorMark) {
this.nextCursorMark = nextCursorMark;
}
}

View File

@ -0,0 +1,309 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import static org.apache.solr.common.params.CursorMarkParams.*;
import org.apache.solr.common.util.Base64;
import org.apache.solr.common.util.JavaBinCodec;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.PostFilter;
import org.apache.solr.search.ExtendedQueryBase;
import org.apache.solr.search.DelegatingCollector;
import org.apache.commons.lang.StringUtils;
import java.util.List;
import java.util.ArrayList;
import java.util.Arrays;
import java.io.ByteArrayOutputStream;
import java.io.ByteArrayInputStream;
import java.io.IOException;
/**
* An object that encapsulates the basic information about the current Mark Point of a
* "Cursor" based request. <code>CursorMark</code> objects track the sort values of
* the last document returned to a user, so that {@link SolrIndexSearcher} can then
* be asked to find all documents "after" the values represented by this
* <code>CursorMark</code>.
*
*/
public final class CursorMark {
/**
* Used for validation and (un)marshalling of sort values
*/
private final SortSpec sortSpec;
/**
* The raw, unmarshalled, sort values (that corrispond with the SortField's in the
* SortSpec) for knowing which docs this cursor should "search after". If this
* list is null, then we have no specific values to "search after" and we
* should start from the very begining of the sorted list of documents matching
* the query.
*/
private List<Object> values = null;
/**
* for serializing this CursorMark as a String
*/
private final JavaBinCodec codec = new JavaBinCodec();
/**
* Generates an empty CursorMark bound for use with the
* specified schema and {@link SortSpec}.
*
* @param schema used for basic validation
* @param sortSpec bound to this totem (un)marshalling serialized values
*/
public CursorMark(IndexSchema schema, SortSpec sortSpec) {
final SchemaField uniqueKey = schema.getUniqueKeyField();
if (null == uniqueKey) {
throw new SolrException(ErrorCode.BAD_REQUEST,
"Cursor functionality is not available unless the IndexSchema defines a uniqueKey field");
}
final Sort sort = sortSpec.getSort();
if (null == sort) {
// pure score, by definition we don't include the mandatyr uniqueKey tie breaker
throw new SolrException(ErrorCode.BAD_REQUEST,
"Cursor functionality requires a sort containing a uniqueKey field tie breaker");
}
if (!sortSpec.getSchemaFields().contains(uniqueKey)) {
throw new SolrException(ErrorCode.BAD_REQUEST,
"Cursor functionality requires a sort containing a uniqueKey field tie breaker");
}
if (0 != sortSpec.getOffset()) {
throw new SolrException(ErrorCode.BAD_REQUEST,
"Cursor functionality requires start=0");
}
for (SortField sf : sort.getSort()) {
if (sf.getType().equals(SortField.Type.DOC)) {
throw new SolrException(ErrorCode.BAD_REQUEST,
"Cursor functionality can not be used with internal doc ordering sort: _docid_");
}
}
if (sort.getSort().length != sortSpec.getSchemaFields().size()) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"Cursor SortSpec failure: sort length != SchemaFields: "
+ sort.getSort().length + " != " +
sortSpec.getSchemaFields().size());
}
this.sortSpec = sortSpec;
this.values = null;
}
/**
* Generates an empty CursorMark bound for use with the same {@link SortSpec}
* as the specified existing CursorMark.
*
* @param previous Existing CursorMark whose SortSpec will be reused in the new CursorMark.
* @see #createNext
*/
private CursorMark(CursorMark previous) {
this.sortSpec = previous.sortSpec;
this.values = null;
}
/**
* Generates an new CursorMark bound for use with the same {@link SortSpec}
* as the current CursorMark but using the new SortValues.
*
*/
public CursorMark createNext(List<Object> nextSortValues) {
final CursorMark next = new CursorMark(this);
next.setSortValues(nextSortValues);
return next;
}
/**
* Sets the (raw, unmarshalled) sort values (which must conform to the existing
* sortSpec) to populate this object. If null, then there is nothing to
* "search after" and the "first page" of results should be returned.
*/
public void setSortValues(List<Object> input) {
if (null == input) {
this.values = null;
} else {
assert input.size() == sortSpec.getSort().getSort().length;
// defensive copy
this.values = new ArrayList<Object>(input);
}
}
/**
* Returns a copy of the (raw, unmarshalled) sort values used by this object, or
* null if first page of docs should be returned (ie: no sort after)
*/
public List<Object> getSortValues() {
// defensive copy
return null == this.values ? null : new ArrayList<Object>(this.values);
}
/**
* Returns the SortSpec used by this object.
*/
public SortSpec getSortSpec() {
return this.sortSpec;
}
/**
* Parses the serialized version of a CursorMark from a client
* (which must conform to the existing sortSpec) and populates this object.
*
* @see #getSerializedTotem
*/
public void parseSerializedTotem(final String serialized) {
if (CURSOR_MARK_START.equals(serialized)) {
values = null;
return;
}
final SortField[] sortFields = sortSpec.getSort().getSort();
final List<SchemaField> schemaFields = sortSpec.getSchemaFields();
List<Object> pieces = null;
try {
final byte[] rawData = Base64.base64ToByteArray(serialized);
ByteArrayInputStream in = new ByteArrayInputStream(rawData);
try {
pieces = (List<Object>) codec.unmarshal(in);
} finally {
in.close();
}
} catch (Exception ex) {
throw new SolrException(ErrorCode.BAD_REQUEST,
"Unable to parse '"+CURSOR_MARK_PARAM+"' after totem: " +
"value must either be '"+CURSOR_MARK_START+"' or the " +
"'"+CURSOR_MARK_NEXT+"' returned by a previous search: "
+ serialized, ex);
}
assert null != pieces : "pieces wasn't parsed?";
if (sortFields.length != pieces.size()) {
throw new SolrException(ErrorCode.BAD_REQUEST,
CURSOR_MARK_PARAM+" does not work with current sort (wrong size): " + serialized);
}
this.values = new ArrayList<Object>(sortFields.length);
final BytesRef tmpBytes = new BytesRef();
for (int i = 0; i < sortFields.length; i++) {
SortField curSort = sortFields[i];
SchemaField curField = schemaFields.get(i);
Object rawValue = pieces.get(i);
if (null != curField) {
FieldType curType = curField.getType();
rawValue = curType.unmarshalSortValue(rawValue);
}
this.values.add(rawValue);
}
}
/**
* Generates a Base64 encoded serialized representation of the sort values
* encapsulated by this object, for use in cursor requests.
*
* @see #parseSerializedTotem
*/
public String getSerializedTotem() {
if (null == this.values) {
return CURSOR_MARK_START;
}
final List<SchemaField> schemaFields = sortSpec.getSchemaFields();
final ArrayList<Object> marshalledValues = new ArrayList<Object>(values.size()+1);
for (int i = 0; i < schemaFields.size(); i++) {
SchemaField fld = schemaFields.get(i);
Object safeValue = values.get(i);
if (null != fld) {
FieldType type = fld.getType();
safeValue = type.marshalSortValue(safeValue);
}
marshalledValues.add(safeValue);
}
// TODO: we could also encode info about the SortSpec for error checking:
// the type/name/dir from the SortFields (or a hashCode to act as a checksum)
// could help provide more validation beyond just the number of clauses.
try {
ByteArrayOutputStream out = new ByteArrayOutputStream(256);
try {
codec.marshal(marshalledValues, out);
byte[] rawData = out.toByteArray();
return Base64.byteArrayToBase64(rawData, 0, rawData.length);
} finally {
out.close();
}
} catch (Exception ex) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"Unable to format search after totem", ex);
}
}
/**
* Returns a synthetically constructed {@link FieldDoc} whose {@link FieldDoc#fields}
* match the values of this object.
* <p>
* Important Notes:
* </p>
* <ul>
* <li>{@link FieldDoc#doc} will always be set to {@link Integer#MAX_VALUE} so
* that the tie breaking logic used by <code>IndexSearcher</code> won't select
* the same doc again based on the internal lucene docId when the Solr
* <code>uniqueKey</code> value is the same.
* </li>
* <li>{@link FieldDoc#score} will always be set to 0.0F since it is not used
* when applying <code>searchAfter</code> logic. (Even if the sort values themselves
* contain scores which are used in the sort)
* </li>
* </ul>
*
* @return a {@link FieldDoc} to "search after" or null if the initial
* page of results is requested.
*/
public FieldDoc getSearchAfterFieldDoc() {
if (null == values) return null;
return new FieldDoc(Integer.MAX_VALUE, 0.0F, values.toArray());
}
}

View File

@ -17,7 +17,6 @@
package org.apache.solr.search;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc; //Issue 1726
import org.apache.lucene.search.Sort;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.MapSolrParams;
@ -208,37 +207,6 @@ public abstract class QParser {
return nestedParser;
}
/**
* use common params to look up pageScore and pageDoc in global params
* @return the ScoreDoc
*/
public ScoreDoc getPaging() throws SyntaxError
{
return null;
/*** This is not ready for prime-time... see SOLR-1726
String pageScoreS = null;
String pageDocS = null;
pageScoreS = params.get(CommonParams.PAGESCORE);
pageDocS = params.get(CommonParams.PAGEDOC);
if (pageScoreS == null || pageDocS == null)
return null;
int pageDoc = pageDocS != null ? Integer.parseInt(pageDocS) : -1;
float pageScore = pageScoreS != null ? new Float(pageScoreS) : -1;
if(pageDoc != -1 && pageScore != -1){
return new ScoreDoc(pageDoc, pageScore);
}
else {
return null;
}
***/
}
/**
* @param useGlobalParams look up sort, start, rows in global params if not in local params
* @return the sort specification

View File

@ -70,6 +70,7 @@ import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
@ -77,6 +78,7 @@ import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TimeLimitingCollector;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopScoreDocCollector;
@ -1346,6 +1348,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
key = null; // we won't be caching the result
}
}
cmd.setSupersetMaxDoc(supersetMaxDoc);
// OK, so now we need to generate an answer.
@ -1368,7 +1371,6 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
}
}
// disable useFilterCache optimization temporarily
if (useFilterCache) {
// now actually use the filter cache.
// for large filters that match few documents, this may be
@ -1381,11 +1383,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
// todo: there could be a sortDocSet that could take a list of
// the filters instead of anding them first...
// perhaps there should be a multi-docset-iterator
superset = sortDocSet(out.docSet,cmd.getSort(),supersetMaxDoc);
out.docList = superset.subset(cmd.getOffset(),cmd.getLen());
sortDocSet(qr, cmd);
} else {
// do it the normal way...
cmd.setSupersetMaxDoc(supersetMaxDoc);
if ((flags & GET_DOCSET)!=0) {
// this currently conflates returning the docset for the base query vs
// the base query and all filters.
@ -1394,11 +1394,28 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
if (qDocSet!=null && filterCache!=null && !qr.isPartialResults()) filterCache.put(cmd.getQuery(),qDocSet);
} else {
getDocListNC(qr,cmd);
//Parameters: cmd.getQuery(),theFilt,cmd.getSort(),0,supersetMaxDoc,cmd.getFlags(),cmd.getTimeAllowed(),responseHeader);
}
assert null != out.docList : "docList is null";
}
if (null == cmd.getCursorMark()) {
// Kludge...
// we can't use DocSlice.subset, even though it should be an identity op
// because it gets confused by situations where there are lots of matches, but
// less docs in the slice then were requested, (due to the cursor)
// so we have to short circuit the call.
// None of which is really a problem since we can't use caching with
// cursors anyway, but it still looks weird to have to special case this
// behavior based on this condition - hence the long explanation.
superset = out.docList;
out.docList = superset.subset(cmd.getOffset(),cmd.getLen());
} else {
// sanity check our cursor assumptions
assert null == superset : "cursor: superset isn't null";
assert 0 == cmd.getOffset() : "cursor: command offset mismatch";
assert 0 == out.docList.offset() : "cursor: docList offset mismatch";
assert cmd.getLen() >= supersetMaxDoc : "cursor: superset len mismatch: " +
cmd.getLen() + " vs " + supersetMaxDoc;
}
// lastly, put the superset in the cache if the size is less than or equal
@ -1408,7 +1425,76 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
}
}
/**
* Helper method for extracting the {@link FieldDoc} sort values from a
* {@link TopFieldDocs} when available and making the appropriate call to
* {@link QueryResult#setNextCursorMark} when applicable.
*
* @param qr <code>QueryResult</code> to modify
* @param qc <code>QueryCommand</code> for context of method
* @param topDocs May or may not be a <code>TopFieldDocs</code>
*/
private void populateNextCursorMarkFromTopDocs(QueryResult qr, QueryCommand qc,
TopDocs topDocs) {
// TODO: would be nice to rename & generalize this method for non-cursor cases...
// ...would be handy to reuse the ScoreDoc/FieldDoc sort vals directly in distrib sort
// ...but that has non-trivial queryResultCache implications
// See: SOLR-5595
if (null == qc.getCursorMark()) {
// nothing to do, short circuit out
return;
}
final CursorMark lastCursorMark = qc.getCursorMark();
// if we have a cursor, then we have a sort that at minimum involves uniqueKey..
// so we must have a TopFieldDocs containing FieldDoc[]
assert topDocs instanceof TopFieldDocs : "TopFieldDocs cursor constraint violated";
final TopFieldDocs topFieldDocs = (TopFieldDocs) topDocs;
final ScoreDoc[] scoreDocs = topFieldDocs.scoreDocs;
if (0 == scoreDocs.length) {
// no docs on this page, re-use existing cursor mark
qr.setNextCursorMark(lastCursorMark);
} else {
ScoreDoc lastDoc = scoreDocs[scoreDocs.length-1];
assert lastDoc instanceof FieldDoc : "FieldDoc cursor constraint violated";
List<Object> lastFields = Arrays.<Object>asList(((FieldDoc)lastDoc).fields);
CursorMark nextCursorMark = lastCursorMark.createNext(lastFields);
assert null != nextCursorMark : "null nextCursorMark";
qr.setNextCursorMark(nextCursorMark);
}
}
/**
* Helper method for inspecting QueryCommand and creating the appropriate
* {@link TopDocsCollector}
*
* @param len the number of docs to return
* @param cmd The Command whose properties should determine the type of
* TopDocsCollector to use.
*/
private TopDocsCollector buildTopDocsCollector(int len, QueryCommand cmd) throws IOException {
if (null == cmd.getSort()) {
assert null == cmd.getCursorMark() : "have cursor but no sort";
return TopScoreDocCollector.create(len, true);
} else {
// we have a sort
final boolean needScores = (cmd.getFlags() & GET_SCORES) != 0;
final Sort weightedSort = weightSort(cmd.getSort());
final CursorMark cursor = cmd.getCursorMark();
// :TODO: make fillFields it's own QueryCommand flag? ...
// ... see comments in populateNextCursorMarkFromTopDocs for cache issues (SOLR-5595)
final boolean fillFields = (null != cursor);
final FieldDoc searchAfter = (null != cursor ? cursor.getSearchAfterFieldDoc() : null);
return TopFieldCollector.create(weightedSort, len, searchAfter,
fillFields, needScores, needScores, true);
}
}
private void getDocListNC(QueryResult qr,QueryCommand cmd) throws IOException {
final long timeAllowed = cmd.getTimeAllowed();
@ -1503,18 +1589,10 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
scores = new float[nDocsReturned];
totalHits = numHits[0];
maxScore = totalHits>0 ? topscore[0] : 0.0f;
// no docs on this page, so cursor doesn't change
qr.setNextCursorMark(cmd.getCursorMark());
} else {
TopDocsCollector topCollector;
if (cmd.getSort() == null) {
if(cmd.getScoreDoc() != null) {
topCollector = TopScoreDocCollector.create(len, cmd.getScoreDoc(), true); //create the Collector with InOrderPagingCollector
} else {
topCollector = TopScoreDocCollector.create(len, true);
}
} else {
topCollector = TopFieldCollector.create(weightSort(cmd.getSort()), len, false, needScores, needScores, true);
}
final TopDocsCollector topCollector = buildTopDocsCollector(len, cmd);
Collector collector = topCollector;
if (terminateEarly) {
collector = new EarlyTerminatingCollector(collector, cmd.len);
@ -1539,6 +1617,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
totalHits = topCollector.getTotalHits();
TopDocs topDocs = topCollector.topDocs(0, len);
populateNextCursorMarkFromTopDocs(qr, cmd, topDocs);
maxScore = totalHits>0 ? topDocs.getMaxScore() : 0.0f;
nDocsReturned = topDocs.scoreDocs.length;
ids = new int[nDocsReturned];
@ -1639,16 +1719,11 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
scores = new float[nDocsReturned];
totalHits = set.size();
maxScore = totalHits>0 ? topscore[0] : 0.0f;
// no docs on this page, so cursor doesn't change
qr.setNextCursorMark(cmd.getCursorMark());
} else {
TopDocsCollector topCollector;
if (cmd.getSort() == null) {
topCollector = TopScoreDocCollector.create(len, true);
} else {
topCollector = TopFieldCollector.create(weightSort(cmd.getSort()), len, false, needScores, needScores, true);
}
final TopDocsCollector topCollector = buildTopDocsCollector(len, cmd);
DocSetCollector setCollector = new DocSetDelegateCollector(maxDoc>>6, maxDoc, topCollector);
Collector collector = setCollector;
if (terminateEarly) {
@ -1678,6 +1753,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
assert(totalHits == set.size());
TopDocs topDocs = topCollector.topDocs(0, len);
populateNextCursorMarkFromTopDocs(qr, cmd, topDocs);
maxScore = totalHits>0 ? topDocs.getMaxScore() : 0.0f;
nDocsReturned = topDocs.scoreDocs.length;
@ -1926,16 +2002,21 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
return qr.getDocListAndSet();
}
protected DocList sortDocSet(DocSet set, Sort sort, int nDocs) throws IOException {
protected void sortDocSet(QueryResult qr, QueryCommand cmd) throws IOException {
DocSet set = qr.getDocListAndSet().docSet;
int nDocs = cmd.getSupersetMaxDoc();
if (nDocs == 0) {
// SOLR-2923
return new DocSlice(0, 0, new int[0], null, 0, 0f);
qr.getDocListAndSet().docList = new DocSlice(0, 0, new int[0], null, set.size(), 0f);
qr.setNextCursorMark(cmd.getCursorMark());
return;
}
// bit of a hack to tell if a set is sorted - do it better in the future.
boolean inOrder = set instanceof BitDocSet || set instanceof SortedIntDocSet;
TopDocsCollector topCollector = TopFieldCollector.create(weightSort(sort), nDocs, false, false, false, inOrder);
TopDocsCollector topCollector = buildTopDocsCollector(nDocs, cmd);
DocIterator iter = set.iterator();
int base=0;
@ -1964,7 +2045,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
ids[i] = scoreDoc.doc;
}
return new DocSlice(0,nDocsReturned,ids,null,topDocs.totalHits,0.0f);
qr.getDocListAndSet().docList = new DocSlice(0,nDocsReturned,ids,null,topDocs.totalHits,0.0f);
populateNextCursorMarkFromTopDocs(qr, cmd, topDocs);
}
@ -2188,20 +2270,27 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
private int supersetMaxDoc;
private int flags;
private long timeAllowed = -1;
//Issue 1726 start
private ScoreDoc scoreDoc;
private CursorMark cursorMark;
public ScoreDoc getScoreDoc()
{
return scoreDoc;
public CursorMark getCursorMark() {
return cursorMark;
}
public void setScoreDoc(ScoreDoc scoreDoc)
{
this.scoreDoc = scoreDoc;
public QueryCommand setCursorMark(CursorMark cursorMark) {
this.cursorMark = cursorMark;
if (null != cursorMark) {
// If we're using a cursor then we can't allow queryResult caching because the
// cache keys don't know anything about the collector used.
//
// in theory, we could enhance the cache keys to be aware of the searchAfter
// FieldDoc but then there would still be complexity around things like the cache
// window size that would need to be worked out
//
// we *can* however allow the use of checking the filterCache for non-score based
// sorts, because that still runs our paging collector over the entire DocSet
this.flags |= (NO_CHECK_QCACHE | NO_SET_QCACHE);
}
return this;
}
//Issue 1726 end
// public List<Grouping.Command> groupCommands;
public Query getQuery() { return query; }
public QueryCommand setQuery(Query query) {
@ -2310,6 +2399,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
public static class QueryResult {
private boolean partialResults;
private DocListAndSet docListAndSet;
private CursorMark nextCursorMark;
public Object groupedResults; // TODO: currently for testing
@ -2334,6 +2424,13 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
public void setDocListAndSet( DocListAndSet listSet ) { docListAndSet = listSet; }
public DocListAndSet getDocListAndSet() { return docListAndSet; }
public void setNextCursorMark(CursorMark next) {
this.nextCursorMark = next;
}
public CursorMark getNextCursorMark() {
return nextCursorMark;
}
}
}

View File

@ -0,0 +1,139 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
NOTE: Tests expect every field in this schema to be sortable.
-->
<schema name="sortable" version="1.5">
<uniqueKey>id</uniqueKey>
<fields>
<field name="id" type="int" />
<field name="_version_" type="long" />
<field name="str" type="str" />
<field name="str_last" type="str_last" />
<field name="str_first" type="str_first" />
<field name="str_dv_last" type="str_dv_last" />
<field name="str_dv_first" type="str_dv_first" />
<field name="bin" type="bin" />
<field name="bin_last" type="bin_last" />
<field name="bin_first" type="bin_first" />
<field name="bin_dv_last" type="bin_dv_last" />
<field name="bin_dv_first" type="bin_dv_first" />
<field name="int" type="int" />
<field name="int_last" type="int_last" />
<field name="int_first" type="int_first" />
<field name="int_dv_last" type="int_dv_last" />
<field name="int_dv_first" type="int_dv_first" />
<field name="long" type="long" />
<field name="long_last" type="long_last" />
<field name="long_first" type="long_first" />
<field name="long_dv_last" type="long_dv_last" />
<field name="long_dv_first" type="long_dv_first" />
<field name="float" type="float" />
<field name="float_last" type="float_last" />
<field name="float_first" type="float_first" />
<field name="float_dv_last" type="float_dv_last" />
<field name="float_dv_first" type="float_dv_first" />
<field name="double" type="double" />
<field name="double_last" type="double_last" />
<field name="double_first" type="double_first" />
<field name="double_dv_last" type="double_dv_last" />
<field name="double_dv_first" type="double_dv_first" />
<!-- ensure function sorts don't mistakenly get interpreted as field sorts
https://issues.apache.org/jira/browse/SOLR-5354?focusedCommentId=13835891&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-13835891
-->
<dynamicField name="*" type="str" multiValued="true" />
</fields>
<copyField source="str" dest="str_last" />
<copyField source="str" dest="str_first" />
<copyField source="str" dest="str_dv_last" />
<copyField source="str" dest="str_dv_first" />
<copyField source="bin" dest="bin_last" />
<copyField source="bin" dest="bin_first" />
<copyField source="bin" dest="bin_dv_last" />
<copyField source="bin" dest="bin_dv_first" />
<copyField source="int" dest="int_last" />
<copyField source="int" dest="int_first" />
<copyField source="int" dest="int_dv_last" />
<copyField source="int" dest="int_dv_first" />
<copyField source="long" dest="long_last" />
<copyField source="long" dest="long_first" />
<copyField source="long" dest="long_dv_last" />
<copyField source="long" dest="long_dv_first" />
<copyField source="float" dest="float_last" />
<copyField source="float" dest="float_first" />
<copyField source="float" dest="float_dv_last" />
<copyField source="float" dest="float_dv_first" />
<copyField source="double" dest="double_last" />
<copyField source="double" dest="double_first" />
<copyField source="double" dest="double_dv_last" />
<copyField source="double" dest="double_dv_first" />
<types>
<fieldtype name="str" class="solr.StrField" stored="true" indexed="true" />
<fieldtype name="str_last" class="solr.StrField" stored="true" indexed="true" sortMissingLast="true"/>
<fieldtype name="str_first" class="solr.StrField" stored="true" indexed="true" sortMissingFirst="true"/>
<fieldtype name="str_dv_last" class="solr.StrField" stored="true" indexed="false" docValues="true" sortMissingLast="true"/>
<fieldtype name="str_dv_first" class="solr.StrField" stored="true" indexed="false" docValues="true" sortMissingFirst="true"/>
<fieldtype name="bin" class="solr.SortableBinaryField" stored="true" indexed="true" />
<fieldtype name="bin_last" class="solr.SortableBinaryField" stored="true" indexed="true" sortMissingLast="true"/>
<fieldtype name="bin_first" class="solr.SortableBinaryField" stored="true" indexed="true" sortMissingFirst="true"/>
<fieldtype name="bin_dv_last" class="solr.SortableBinaryField" stored="true" indexed="false" docValues="true" sortMissingLast="true"/>
<fieldtype name="bin_dv_first" class="solr.SortableBinaryField" stored="true" indexed="false" docValues="true" sortMissingFirst="true"/>
<fieldtype name="int" class="solr.TrieIntField" stored="true" indexed="true" />
<fieldtype name="int_last" class="solr.TrieIntField" stored="true" indexed="true" sortMissingLast="true"/>
<fieldtype name="int_first" class="solr.TrieIntField" stored="true" indexed="true" sortMissingFirst="true"/>
<fieldtype name="int_dv_last" class="solr.TrieIntField" stored="true" indexed="false" docValues="true" sortMissingLast="true"/>
<fieldtype name="int_dv_first" class="solr.TrieIntField" stored="true" indexed="false" docValues="true" sortMissingFirst="true"/>
<fieldtype name="long" class="solr.TrieLongField" stored="true" indexed="true" />
<fieldtype name="long_last" class="solr.TrieLongField" stored="true" indexed="true" sortMissingLast="true"/>
<fieldtype name="long_first" class="solr.TrieLongField" stored="true" indexed="true" sortMissingFirst="true"/>
<fieldtype name="long_dv_last" class="solr.TrieLongField" stored="true" indexed="false" docValues="true" sortMissingLast="true"/>
<fieldtype name="long_dv_first" class="solr.TrieLongField" stored="true" indexed="false" docValues="true" sortMissingFirst="true"/>
<fieldtype name="float" class="solr.TrieFloatField" stored="true" indexed="true" />
<fieldtype name="float_last" class="solr.TrieFloatField" stored="true" indexed="true" sortMissingLast="true"/>
<fieldtype name="float_first" class="solr.TrieFloatField" stored="true" indexed="true" sortMissingFirst="true"/>
<fieldtype name="float_dv_last" class="solr.TrieFloatField" stored="true" indexed="false" docValues="true" sortMissingLast="true"/>
<fieldtype name="float_dv_first" class="solr.TrieFloatField" stored="true" indexed="false" docValues="true" sortMissingFirst="true"/>
<fieldtype name="double" class="solr.TrieDoubleField" stored="true" indexed="true" />
<fieldtype name="double_last" class="solr.TrieDoubleField" stored="true" indexed="true" sortMissingLast="true"/>
<fieldtype name="double_first" class="solr.TrieDoubleField" stored="true" indexed="true" sortMissingFirst="true"/>
<fieldtype name="double_dv_last" class="solr.TrieDoubleField" stored="true" indexed="false" docValues="true" sortMissingLast="true"/>
<fieldtype name="double_dv_first" class="solr.TrieDoubleField" stored="true" indexed="false" docValues="true" sortMissingFirst="true"/>
</types>
</schema>

View File

@ -0,0 +1,59 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<config>
<dataDir>${solr.data.dir:}</dataDir>
<directoryFactory name="DirectoryFactory"
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
<luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
<xi:include href="solrconfig.snippet.randomindexconfig.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
<updateHandler class="solr.DirectUpdateHandler2">
<updateLog>
<str name="dir">${solr.ulog.dir:}</str>
</updateLog>
</updateHandler>
<!-- deep paging better play nice with caching -->
<query>
<!-- no wautowarming, it screws up our ability to sanity check cache stats in tests -->
<filterCache class="solr.FastLRUCache" size="50" initialSize="50" autowarmCount="0"/>
<queryResultCache class="solr.LRUCache" size="50" initialSize="50" autowarmCount="0"/>
<queryResultWindowSize>50</queryResultWindowSize>
<queryResultMaxDocsCached>500</queryResultMaxDocsCached>
<!-- randomized so we excersize cursors using various paths in SolrIndexSearcher -->
<useFilterForSortedQuery>${solr.test.useFilterForSortedQuery}</useFilterForSortedQuery>
</query>
<requestHandler name="/get" class="solr.RealTimeGetHandler">
<lst name="defaults">
<str name="omitHeader">true</str>
</lst>
</requestHandler>
<requestHandler name="/replication" class="solr.ReplicationHandler" startup="lazy" />
<requestHandler name="/select" class="solr.SearchHandler" default="true" />
<requestHandler name="/update" class="solr.UpdateRequestHandler" />
<requestHandler name="/admin/" class="solr.admin.AdminHandlers" />
</config>

View File

@ -947,51 +947,6 @@ public class BasicFunctionalityTest extends SolrTestCaseJ4 {
}
}
@Ignore("See SOLR-1726")
@Test
public void testDeepPaging() throws Exception {
for (int i = 0; i < 1000; i++){
assertU(adoc("id", String.valueOf(i), "foo_t", English.intToEnglish(i)));
}
assertU(commit());
SolrQueryRequest goldReq = null;
try {
goldReq = req("q", "foo_t:one", "rows", "50", "fl", "docid, score");
SolrQueryResponse gold = h.queryAndResponse("standard", goldReq);
ResultContext response = (ResultContext) gold.getValues().get("response");
assertQ("page: " + 0 + " failed",
req("q", "foo_t:one", "rows", "10", CommonParams.QT, "standard", "fl", "[docid], score"),
"*[count(//doc)=10]");
//ugh, what a painful way to get the document
DocIterator iterator = response.docs.subset(9, 1).iterator();
int lastDoc = iterator.nextDoc();
float lastScore = iterator.score();
for (int i = 1; i < 5; i++){
//page through some results
DocList subset = response.docs.subset(i * 10, 1);
iterator = subset.iterator();
int compareDoc = iterator.nextDoc();
float compareScore = iterator.score();
assertQ("page: " + i + " failed",
req("q", "foo_t:one", CommonParams.QT, "standard", "fl", "[docid], score",
"start", String.valueOf(i * 10), "rows", "1", //only get one doc, and then compare it to gold
CommonParams.PAGEDOC, String.valueOf(lastDoc), CommonParams.PAGESCORE, String.valueOf(lastScore)),
"*[count(//doc)=1]",
"//float[@name='score'][.='" + compareScore + "']",
"//int[@name='[docid]'][.='" + compareDoc + "']"
);
lastScore = compareScore;
lastDoc = compareDoc;
}
} finally {
if (goldReq != null ) {
goldReq.close();
}
}
}
// /** this doesn't work, but if it did, this is how we'd test it. */
// public void testOverwriteFalse() {

View File

@ -0,0 +1,759 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.SentinelIntSet;
import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.GroupParams;
import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_PARAM;
import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_NEXT;
import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_START;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.CursorMark; //jdoc
import org.noggit.ObjectBuilder;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.nio.ByteBuffer;
import org.junit.BeforeClass;
import org.junit.After;
/**
* Tests of deep paging using {@link CursorMark} and {@link #CURSOR_MARK_PARAM}.
*/
public class CursorPagingTest extends SolrTestCaseJ4 {
/** solrconfig.xml file name, shared with other cursor related tests */
public final static String TEST_SOLRCONFIG_NAME = "solrconfig-deeppaging.xml";
/** schema.xml file name, shared with other cursor related tests */
public final static String TEST_SCHEMAXML_NAME = "schema-sorts.xml";
@BeforeClass
public static void beforeTests() throws Exception {
System.setProperty("solr.test.useFilterForSortedQuery", Boolean.toString(random().nextBoolean()));
initCore(TEST_SOLRCONFIG_NAME, TEST_SCHEMAXML_NAME);
}
@After
public void cleanup() throws Exception {
assertU(delQ("*:*"));
assertU(commit());
}
/** verify expected error msgs from bad client behavior */
public void testBadInputs() throws Exception {
// sometimes seed some data, othertimes use an empty index
if (random().nextBoolean()) {
assertU(adoc("id", "42", "str", "z", "float", "99.99", "int", "42"));
assertU(adoc("id", "66", "str", "x", "float", "22.00", "int", "-66"));
} else {
assertU(commit());
}
assertU(commit());
// empty, blank, or bogus cursor
for (String c : new String[] { "", " ", "all the docs please!"}) {
assertFail(params("q", "*:*",
"sort", "id desc",
CURSOR_MARK_PARAM, c),
ErrorCode.BAD_REQUEST, "Unable to parse");
}
// no id in sort
assertFail(params("q", "*:*",
"sort", "score desc",
CURSOR_MARK_PARAM, CURSOR_MARK_START),
ErrorCode.BAD_REQUEST, "uniqueKey field");
// _docid_
assertFail(params("q", "*:*",
"sort", "_docid_ asc, id desc",
CURSOR_MARK_PARAM, CURSOR_MARK_START),
ErrorCode.BAD_REQUEST, "_docid_");
// using cursor w/ timeAllowed
assertFail(params("q", "*:*",
"sort", "id desc",
CommonParams.TIME_ALLOWED, "1000",
CURSOR_MARK_PARAM, CURSOR_MARK_START),
ErrorCode.BAD_REQUEST, CommonParams.TIME_ALLOWED);
// using cursor w/ grouping
assertFail(params("q", "*:*",
"sort", "id desc",
GroupParams.GROUP, "true",
GroupParams.GROUP_FIELD, "str",
CURSOR_MARK_PARAM, CURSOR_MARK_START),
ErrorCode.BAD_REQUEST, "Grouping");
}
/** simple static test of some carefully crafted docs */
public void testSimple() throws Exception {
String cursorMark;
SolrParams params = null;
// trivial base case: ensure cursorMark against an empty index doesn't blow up
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*",
"rows","4",
"fl", "id",
"sort", "id desc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==0"
,"/response/start==0"
,"/response/docs==[]"
);
assertEquals(CURSOR_MARK_START, cursorMark);
// don't add in order of any field to ensure we aren't inadvertantly
// counting on internal docid ordering
assertU(adoc("id", "9", "str", "c", "float", "-3.2", "int", "42"));
assertU(adoc("id", "7", "str", "c", "float", "-3.2", "int", "-1976"));
assertU(adoc("id", "2", "str", "c", "float", "-3.2", "int", "666"));
assertU(adoc("id", "0", "str", "b", "float", "64.5", "int", "-42"));
assertU(adoc("id", "5", "str", "b", "float", "64.5", "int", "2001"));
assertU(adoc("id", "8", "str", "b", "float", "64.5", "int", "4055"));
assertU(adoc("id", "6", "str", "a", "float", "64.5", "int", "7"));
assertU(adoc("id", "1", "str", "a", "float", "64.5", "int", "7"));
assertU(adoc("id", "4", "str", "a", "float", "11.1", "int", "6"));
assertU(adoc("id", "3", "str", "a", "float", "11.1", "int", "3"));
assertU(commit());
// base case: ensure cursorMark that matches no docs doesn't blow up
cursorMark = CURSOR_MARK_START;
params = params("q", "id:9999999",
"rows","4",
"fl", "id",
"sort", "id desc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==0"
,"/response/start==0"
,"/response/docs==[]"
);
assertEquals(CURSOR_MARK_START, cursorMark);
// edge case: ensure rows=0 doesn't blow up and gives back same cursor for next
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*",
"rows","0",
"fl", "id",
"sort", "id desc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==10"
,"/response/start==0"
,"/response/docs==[]"
);
assertEquals(CURSOR_MARK_START, cursorMark);
// simple id sort w/some faceting
cursorMark = CURSOR_MARK_START;
params = params("q", "-int:6",
"rows","4",
"fl", "id",
"sort", "id desc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==9"
,"/response/start==0"
,"/response/docs==[{'id':9},{'id':8},{'id':7},{'id':6}]"
);
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==9"
,"/response/start==0"
,"/response/docs==[{'id':5},{'id':3},{'id':2},{'id':1}]"
);
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==9"
,"/response/start==0"
,"/response/docs==[{'id':0}]"
);
// no more, so no change to cursorMark, and no new docs
assertEquals(cursorMark,
assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==9"
,"/response/start==0"
,"/response/docs==[]"
));
// simple score sort w/some faceting
cursorMark = CURSOR_MARK_START;
params = params("q", "float:[0 TO *] int:7 id:6",
"rows","4",
"fl", "id",
"facet", "true",
"facet.field", "str",
"json.nl", "map",
"sort", "score desc, id desc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==7"
,"/response/start==0"
,"/response/docs==[{'id':6},{'id':1},{'id':8},{'id':5}]"
,"/facet_counts/facet_fields/str=={'a':4,'b':3,'c':0}"
);
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==7"
,"/response/start==0"
,"/response/docs==[{'id':4},{'id':3},{'id':0}]"
,"/facet_counts/facet_fields/str=={'a':4,'b':3,'c':0}"
);
// no more, so no change to cursorMark, and no new docs
assertEquals(cursorMark,
assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==7"
,"/response/start==0"
,"/response/docs==[]"
,"/facet_counts/facet_fields/str=={'a':4,'b':3,'c':0}"
));
// int sort with dups, id tie breaker ... and some faceting
cursorMark = CURSOR_MARK_START;
params = params("q", "-int:2001 -int:4055",
"rows","3",
"fl", "id",
"facet", "true",
"facet.field", "str",
"json.nl", "map",
"sort", "int asc, id asc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==8"
,"/response/start==0"
,"/response/docs==[{'id':7},{'id':0},{'id':3}]"
,"/facet_counts/facet_fields/str=={'a':4,'b':1,'c':3}"
);
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==8"
,"/response/start==0"
,"/response/docs==[{'id':4},{'id':1},{'id':6}]"
,"/facet_counts/facet_fields/str=={'a':4,'b':1,'c':3}"
);
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==8"
,"/response/start==0"
,"/response/docs==[{'id':9},{'id':2}]"
,"/facet_counts/facet_fields/str=={'a':4,'b':1,'c':3}"
);
// no more, so no change to cursorMark, and no new docs
assertEquals(cursorMark,
assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==8"
,"/response/start==0"
,"/response/docs==[]"
,"/facet_counts/facet_fields/str=={'a':4,'b':1,'c':3}"
));
// string sort with dups, id tie breaker
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*",
"rows","6",
"fl", "id",
"sort", "str asc, id desc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==10"
,"/response/start==0"
,"/response/docs==[{'id':6},{'id':4},{'id':3},{'id':1},{'id':8},{'id':5}]"
);
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==10"
,"/response/start==0"
,"/response/docs==[{'id':0},{'id':9},{'id':7},{'id':2}]"
);
// no more, so no change to cursorMark, and no new docs
assertEquals(cursorMark,
assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==10"
,"/response/start==0"
,"/response/docs==[]"
));
// tri-level sort with more dups of primary then fit on a page
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*",
"rows","2",
"fl", "id",
"sort", "float asc, int desc, id desc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==10"
,"/response/start==0"
,"/response/docs==[{'id':2},{'id':9}]"
);
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==10"
,"/response/start==0"
,"/response/docs==[{'id':7},{'id':4}]"
);
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==10"
,"/response/start==0"
,"/response/docs==[{'id':3},{'id':8}]"
);
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==10"
,"/response/start==0"
,"/response/docs==[{'id':5},{'id':6}]"
);
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==10"
,"/response/start==0"
,"/response/docs==[{'id':1},{'id':0}]"
);
// we've exactly exhausted all the results, but solr had no way of know that
// no more, so no change to cursorMark, and no new docs
assertEquals(cursorMark,
assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==10"
,"/response/start==0"
,"/response/docs==[]"
));
// trivial base case: rows bigger then number of matches
cursorMark = CURSOR_MARK_START;
params = params("q", "id:3 id:7",
"rows","111",
"fl", "id",
"sort", "int asc, id asc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==2"
,"/response/start==0"
,"/response/docs==[{'id':7},{'id':3}]"
);
// no more, so no change to cursorMark, and no new docs
assertEquals(cursorMark,
assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==2"
,"/response/start==0"
,"/response/docs==[]"
));
// sanity check our full walk method
SentinelIntSet ids;
ids = assertFullWalkNoDups(10, params("q", "*:*",
"rows", "4",
"sort", "id desc"));
assertEquals(10, ids.size());
ids = assertFullWalkNoDups(9, params("q", "*:*",
"rows", "1",
"fq", "-id:4",
"sort", "id asc"));
assertEquals(9, ids.size());
assertFalse("matched on id:4 unexpectedly", ids.exists(4));
ids = assertFullWalkNoDups(9, params("q", "*:*",
"rows", "3",
"fq", "-id:6",
"sort", "float desc, id asc, int asc"));
assertEquals(9, ids.size());
assertFalse("matched on id:6 unexpectedly", ids.exists(6));
ids = assertFullWalkNoDups(9, params("q", "float:[0 TO *] int:7 id:6",
"rows", "3",
"sort", "score desc, id desc"));
assertEquals(7, ids.size());
assertFalse("matched on id:9 unexpectedly", ids.exists(9));
assertFalse("matched on id:7 unexpectedly", ids.exists(7));
assertFalse("matched on id:2 unexpectedly", ids.exists(2));
// strategically delete/add some docs in the middle of walking the cursor
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*",
"rows","2",
"fl", "id",
"sort", "str asc, id asc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==10"
,"/response/start==0"
,"/response/docs==[{'id':1},{'id':3}]"
);
// delete the last guy we got
assertU(delI("3"));
assertU(commit());
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==9"
,"/response/start==0"
,"/response/docs==[{'id':4},{'id':6}]"
);
// delete the next guy we expect
assertU(delI("0"));
assertU(commit());
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==8"
,"/response/start==0"
,"/response/docs==[{'id':5},{'id':8}]"
);
// update a doc we've already seen so it repeats
assertU(adoc("id", "5", "str", "c"));
assertU(commit());
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==8"
,"/response/start==0"
,"/response/docs==[{'id':2},{'id':5}]"
);
// update the next doc we expect so it's now in the past
assertU(adoc("id", "7", "str", "a"));
assertU(commit());
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==8"
,"/response/start==0"
,"/response/docs==[{'id':9}]"
);
// no more, so no change to cursorMark, and no new docs
assertEquals(cursorMark,
assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==8"
,"/response/start==0"
,"/response/docs==[]"
));
}
/**
* test that our assumptions about how caches are affected hold true
*/
public void testCacheImpacts() throws Exception {
// cursor queryies can't live in the queryResultCache, but independent filters
// should still be cached & reused
// don't add in order of any field to ensure we aren't inadvertantly
// counting on internal docid ordering
assertU(adoc("id", "9", "str", "c", "float", "-3.2", "int", "42"));
assertU(adoc("id", "7", "str", "c", "float", "-3.2", "int", "-1976"));
assertU(adoc("id", "2", "str", "c", "float", "-3.2", "int", "666"));
assertU(adoc("id", "0", "str", "b", "float", "64.5", "int", "-42"));
assertU(adoc("id", "5", "str", "b", "float", "64.5", "int", "2001"));
assertU(adoc("id", "8", "str", "b", "float", "64.5", "int", "4055"));
assertU(adoc("id", "6", "str", "a", "float", "64.5", "int", "7"));
assertU(adoc("id", "1", "str", "a", "float", "64.5", "int", "7"));
assertU(adoc("id", "4", "str", "a", "float", "11.1", "int", "6"));
assertU(adoc("id", "3", "str", "a", "float", "11.1", "int", "3"));
assertU(commit());
final Collection<String> allFieldNames = getAllFieldNames();
final SolrInfoMBean filterCacheStats
= h.getCore().getInfoRegistry().get("filterCache");
assertNotNull(filterCacheStats);
final SolrInfoMBean queryCacheStats
= h.getCore().getInfoRegistry().get("queryResultCache");
assertNotNull(queryCacheStats);
final long preQcIn = (Long) queryCacheStats.getStatistics().get("inserts");
final long preFcIn = (Long) filterCacheStats.getStatistics().get("inserts");
final long preFcHits = (Long) filterCacheStats.getStatistics().get("hits");
SentinelIntSet ids = assertFullWalkNoDups
(10, params("q", "*:*",
"rows",""+_TestUtil.nextInt(random(),1,11),
"fq", "-id:[1 TO 2]",
"fq", "-id:[6 TO 7]",
"fl", "id",
"sort", buildRandomSort(allFieldNames)));
assertEquals(6, ids.size());
final long postQcIn = (Long) queryCacheStats.getStatistics().get("inserts");
final long postFcIn = (Long) filterCacheStats.getStatistics().get("inserts");
final long postFcHits = (Long) filterCacheStats.getStatistics().get("hits");
assertEquals("query cache inserts changed", preQcIn, postQcIn);
// NOTE: use of pure negative filters causees "*:* to be tracked in filterCache
assertEquals("filter cache did not grow correctly", 3, postFcIn-preFcIn);
assertTrue("filter cache did not have any new cache hits", 0 < postFcHits-preFcHits);
}
/** randomized testing of a non-trivial number of docs using assertFullWalkNoDups
*/
public void testRandomSortsOnLargeIndex() throws Exception {
final Collection<String> allFieldNames = getAllFieldNames();
final int initialDocs = _TestUtil.nextInt(random(),100,200);
final int totalDocs = atLeast(5000);
// start with a smallish number of documents, and test that we can do a full walk using a
// sort on *every* field in the schema...
for (int i = 1; i <= initialDocs; i++) {
SolrInputDocument doc = buildRandomDocument(i);
assertU(adoc(doc));
}
assertU(commit());
for (String f : allFieldNames) {
for (String order : new String[] {" asc", " desc"}) {
String sort = f + order + ("id".equals(f) ? "" : ", id" + order);
String rows = "" + _TestUtil.nextInt(random(),13,50);
SentinelIntSet ids = assertFullWalkNoDups(totalDocs,
params("q", "*:*",
"fl","id",
"rows",rows,
"sort",sort));
assertEquals(initialDocs, ids.size());
}
}
// now add a lot more docs, and test a handful of randomized sorts
for (int i = initialDocs+1; i <= totalDocs; i++) {
SolrInputDocument doc = buildRandomDocument(i);
assertU(adoc(doc));
}
assertU(commit());
final int numRandomSorts = atLeast(5);
for (int i = 0; i < numRandomSorts; i++) {
final String sort = buildRandomSort(allFieldNames);
final String rows = "" + _TestUtil.nextInt(random(),63,113);
final String fl = random().nextBoolean() ? "id" : "id,score";
final boolean matchAll = random().nextBoolean();
final String q = matchAll ? "*:*" : buildRandomQuery();
SentinelIntSet ids = assertFullWalkNoDups(totalDocs,
params("q", q,
"fl",fl,
"rows",rows,
"sort",sort));
if (matchAll) {
assertEquals(totalDocs, ids.size());
}
}
}
/** Similar to usually() but we want it to happen just as often regardless
* of test multiplier and nightly status
*/
private static boolean useField() {
return 0 != _TestUtil.nextInt(random(), 0, 30);
}
/** returns likely most (1/10) of the time, otherwise unlikely */
private static Object skewed(Object likely, Object unlikely) {
return (0 == _TestUtil.nextInt(random(), 0, 9)) ? unlikely : likely;
}
/**
* a list of the fields in the schema - excluding _version_
*/
private Collection<String> getAllFieldNames() {
ArrayList<String> names = new ArrayList<String>(37);
for (String f : h.getCore().getLatestSchema().getFields().keySet()) {
if (! f.equals("_version_")) {
names.add(f);
}
}
return Collections.<String>unmodifiableCollection(names);
}
/**
* Given a set of params, executes a cursor query using {@link #CURSOR_MARK_START}
* and then continuously walks the results using {@link #CURSOR_MARK_START} as long
* as a non-0 number of docs ar returned. This method records the the set of all id's
* (must be postive ints) encountered and throws an assertion failure if any id is
* encountered more then once, or if the set grows above maxSize
*/
public SentinelIntSet assertFullWalkNoDups(int maxSize, SolrParams params)
throws Exception {
SentinelIntSet ids = new SentinelIntSet(maxSize, -1);
String cursorMark = CURSOR_MARK_START;
int docsOnThisPage = Integer.MAX_VALUE;
while (0 < docsOnThisPage) {
String json = assertJQ(req(params,
CURSOR_MARK_PARAM, cursorMark));
Map rsp = (Map) ObjectBuilder.fromJSON(json);
assertTrue("response doesn't contain " + CURSOR_MARK_NEXT + ": " + json,
rsp.containsKey(CURSOR_MARK_NEXT));
String nextCursorMark = (String)rsp.get(CURSOR_MARK_NEXT);
assertNotNull(CURSOR_MARK_NEXT + " is null", nextCursorMark);
List<Map<Object,Object>> docs = (List) (((Map)rsp.get("response")).get("docs"));
docsOnThisPage = docs.size();
if (null != params.getInt(CommonParams.ROWS)) {
int rows = params.getInt(CommonParams.ROWS);
assertTrue("Too many docs on this page: " + rows + " < " + docsOnThisPage,
docsOnThisPage <= rows);
}
if (0 == docsOnThisPage) {
assertEquals("no more docs, but "+CURSOR_MARK_NEXT+" isn't same",
cursorMark, nextCursorMark);
}
for (Map<Object,Object> doc : docs) {
int id = ((Long)doc.get("id")).intValue();
assertFalse("walk already seen: " + id, ids.exists(id));
ids.put(id);
assertFalse("id set bigger then max allowed ("+maxSize+"): " + ids.size(),
maxSize < ids.size());
}
cursorMark = nextCursorMark;
}
return ids;
}
/**
* Asserts that the query matches the specified JSON patterns and then returns the
* {@link #CURSOR_MARK_NEXT} value from the response
*
* @see #assertJQ
*/
public String assertCursor(SolrQueryRequest req, String... tests) throws Exception {
String json = assertJQ(req, tests);
Map rsp = (Map) ObjectBuilder.fromJSON(json);
assertTrue("response doesn't contain "+CURSOR_MARK_NEXT + ": " + json,
rsp.containsKey(CURSOR_MARK_NEXT));
String next = (String)rsp.get(CURSOR_MARK_NEXT);
assertNotNull(CURSOR_MARK_NEXT + " is null", next);
return next;
}
/**
* execute a local request, verify that we get an expected error
*/
public void assertFail(SolrParams p, ErrorCode expCode, String expSubstr)
throws Exception {
try {
ignoreException(expSubstr);
assertJQ(req(p));
fail("no exception matching expected: " + expCode.code + ": " + expSubstr);
} catch (SolrException e) {
assertEquals(expCode.code, e.code());
assertTrue("Expected substr not found: " + expSubstr + " <!< " + e.getMessage(),
e.getMessage().contains(expSubstr));
} finally {
unIgnoreException(expSubstr);
}
}
/**
* Creates a document with randomized field values, some of which be missing values,
* and some of which will be skewed so that small subsets of the ranges will be
* more common (resulting in an increased likelihood of duplicate values)
*
* @see #buildRandomQuery
*/
public static SolrInputDocument buildRandomDocument(int id) {
SolrInputDocument doc = sdoc("id", id);
// most fields are in most docs
// if field is in a doc, then "skewed" chance val is from a dense range
// (hopefully with lots of duplication)
if (useField()) {
doc.addField("int", skewed(random().nextInt(),
_TestUtil.nextInt(random(), 20, 50)));
}
if (useField()) {
doc.addField("long", skewed(random().nextLong(),
_TestUtil.nextInt(random(), 5000, 5100)));
}
if (useField()) {
doc.addField("float", skewed(random().nextFloat() * random().nextInt(),
1.0F / random().nextInt(23)));
}
if (useField()) {
doc.addField("double", skewed(random().nextDouble() * random().nextInt(),
1.0D / random().nextInt(37)));
}
if (useField()) {
doc.addField("str", skewed(randomUsableUnicodeString(),
_TestUtil.randomSimpleString(random(),1,1)));
}
if (useField()) {
int numBytes = (int) skewed(_TestUtil.nextInt(random(), 20, 50), 2);
byte[] randBytes = new byte[numBytes];
random().nextBytes(randBytes);
doc.addField("bin", ByteBuffer.wrap(randBytes));
}
return doc;
}
/**
* Generates a random query using the fields populated by
* {@link #buildRandomDocument}. Queries will typically be fairly simple, but
* won't be so trivial that the scores are completely constant.
*/
public static String buildRandomQuery() {
List<String> numericFields = Arrays.asList("int","long","float","double");
Collections.shuffle(numericFields, random());
if (random().nextBoolean()) {
// simple function query across one field.
return "{!func}" + numericFields.get(0);
} else {
// several SHOULD clauses on range queries
int low = _TestUtil.nextInt(random(),-2379,2);
int high = _TestUtil.nextInt(random(),4,5713);
return
numericFields.get(0) + ":[* TO 0] " +
numericFields.get(1) + ":[0 TO *] " +
numericFields.get(2) + ":[" + low + " TO " + high + "]";
}
}
/**
* We want "realistic" unicode strings beyond simple ascii, but because our
* updates use XML we need to ensure we don't get "special" code block.
*/
private static String randomUsableUnicodeString() {
String result = _TestUtil.randomRealisticUnicodeString(random());
if (result.matches(".*\\p{InSpecials}.*")) {
// oh well
result = _TestUtil.randomSimpleString(random());
}
return result;
}
/**
* Given a list of fieldNames, builds up a random sort string which is guaranteed to
* have at least 3 clauses, ending with the "id" field for tie breaking
*/
public static String buildRandomSort(final Collection<String> fieldNames) {
ArrayList<String> shuffledNames = new ArrayList<String>(fieldNames);
Collections.replaceAll(shuffledNames, "id", "score");
Collections.shuffle(shuffledNames, random());
final StringBuilder result = new StringBuilder();
final int numClauses = atLeast(2);
for (int i = 0; i < numClauses; i++) {
String field = shuffledNames.get(i);
// wrap in a function sometimes
if ( (!"score".equals(field))
&&
(0 == _TestUtil.nextInt(random(), 0, 7)) ) {
// specific function doesn't matter, just proving that we can handle the concept.
// but we do have to be careful with non numeric fields
if (field.startsWith("str") || field.startsWith("bin")) {
field = "if(exists(" + field + "),47,83)";
} else {
field = "abs(" + field + ")";
}
}
result.append(field).append(random().nextBoolean() ? " asc, " : " desc, ");
}
result.append("id").append(random().nextBoolean() ? " asc" : " desc");
return result.toString();
}
}

View File

@ -0,0 +1,64 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr;
import org.apache.solr.schema.SchemaField;
import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_START;
import org.junit.Before;
import org.junit.After;
/**
* Tests that cursor requests fail unless the IndexSchema defines a uniqueKey.
*/
public class TestCursorMarkWithoutUniqueKey extends SolrTestCaseJ4 {
/** solrconfig.xml file name, shared with other cursor related tests */
public final static String TEST_SOLRCONFIG_NAME = CursorPagingTest.TEST_SOLRCONFIG_NAME;
public final static String TEST_SCHEMAXML_NAME = "schema-minimal.xml";
@Before
public void beforeSetupCore() throws Exception {
System.setProperty("solr.test.useFilterForSortedQuery", Boolean.toString(random().nextBoolean()));
initCore(TEST_SOLRCONFIG_NAME, TEST_SCHEMAXML_NAME);
SchemaField uniqueKeyField = h.getCore().getLatestSchema().getUniqueKeyField();
assertNull("This test requires that the schema not have a uniquekey field -- someone violated that in " + TEST_SCHEMAXML_NAME, uniqueKeyField);
}
@After
public void afterDestroyCore() throws Exception {
deleteCore();
}
public void test() throws Exception {
assertU(adoc("fld", "val"));
assertU(commit());
try {
ignoreException("Cursor functionality is not available unless the IndexSchema defines a uniqueKey field");
assertQ(req("q", "*:*", "sort", "fld desc", "cursorMark", CURSOR_MARK_START));
fail("No exception when querying with a cursorMark with no uniqueKey defined.");
} catch (Exception e) {
unIgnoreException("Cursor functionality is not available unless the IndexSchema defines a uniqueKey field");
}
}
}

View File

@ -0,0 +1,641 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.SentinelIntSet;
import org.apache.solr.CursorPagingTest;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.request.LukeRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.GroupParams;
import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_PARAM;
import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_NEXT;
import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_START;
import org.apache.solr.search.CursorMark; //jdoc
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
/**
* Distributed tests of deep paging using {@link CursorMark} and {@link #CURSOR_MARK_PARAM}.
*
* NOTE: this class Reuses some utilities from {@link CursorPagingTest} that assume the same schema and configs.
*
* @see CursorPagingTest
*/
@Slow
public class DistribCursorPagingTest extends AbstractFullDistribZkTestBase {
public DistribCursorPagingTest() {
System.setProperty("solr.test.useFilterForSortedQuery", Boolean.toString(random().nextBoolean()));
configString = CursorPagingTest.TEST_SOLRCONFIG_NAME;
schemaString = CursorPagingTest.TEST_SCHEMAXML_NAME;
}
@Override
protected String getCloudSolrConfig() {
return configString;
}
@Override
public void doTest() throws Exception {
boolean testFinished = false;
try {
handle.clear();
handle.put("QTime", SKIPVAL);
handle.put("timestamp", SKIPVAL);
doBadInputTest();
del("*:*");
commit();
doSimpleTest();
del("*:*");
commit();
doRandomSortsOnLargeIndex();
del("*:*");
commit();
testFinished = true;
} finally {
if (!testFinished) {
printLayoutOnTearDown = true;
}
}
}
private void doBadInputTest() throws Exception {
// sometimes seed some data, othertimes use an empty index
if (random().nextBoolean()) {
indexDoc(sdoc("id", "42", "str", "z", "float", "99.99", "int", "42"));
indexDoc(sdoc("id", "66", "str", "x", "float", "22.00", "int", "-66"));
} else {
del("*:*");
}
commit();
// empty, blank, or bogus cursor
for (String c : new String[] { "", " ", "all the docs please!"}) {
assertFail(params("q", "*:*",
"sort", "id desc",
CURSOR_MARK_PARAM, c),
ErrorCode.BAD_REQUEST, "Unable to parse");
}
// no id in sort
assertFail(params("q", "*:*",
"sort", "score desc",
CURSOR_MARK_PARAM, CURSOR_MARK_START),
ErrorCode.BAD_REQUEST, "uniqueKey field");
// _docid_
assertFail(params("q", "*:*",
"sort", "_docid_ asc, id desc",
CURSOR_MARK_PARAM, CURSOR_MARK_START),
ErrorCode.BAD_REQUEST, "_docid_");
// using cursor w/ timeAllowed
assertFail(params("q", "*:*",
"sort", "id desc",
CommonParams.TIME_ALLOWED, "1000",
CURSOR_MARK_PARAM, CURSOR_MARK_START),
ErrorCode.BAD_REQUEST, CommonParams.TIME_ALLOWED);
// using cursor w/ grouping
assertFail(params("q", "*:*",
"sort", "id desc",
GroupParams.GROUP, "true",
GroupParams.GROUP_FIELD, "str",
CURSOR_MARK_PARAM, CURSOR_MARK_START),
ErrorCode.BAD_REQUEST, "Grouping");
}
private void doSimpleTest() throws Exception {
String cursorMark = CURSOR_MARK_START;
SolrParams params = null;
QueryResponse rsp = null;
// trivial base case: ensure cursorMark against an empty index doesn't blow up
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*",
"rows","4",
"fl", "id",
"sort", "id desc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(0, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp);
assertEquals(cursorMark, assertHashNextCursorMark(rsp));
// don't add in order of either field to ensure we aren't inadvertantly
// counting on internal docid ordering
indexDoc(sdoc("id", "9", "str", "c", "float", "-3.2", "int", "42"));
indexDoc(sdoc("id", "7", "str", "c", "float", "-3.2", "int", "-1976"));
indexDoc(sdoc("id", "2", "str", "c", "float", "-3.2", "int", "666"));
indexDoc(sdoc("id", "0", "str", "b", "float", "64.5", "int", "-42"));
indexDoc(sdoc("id", "5", "str", "b", "float", "64.5", "int", "2001"));
indexDoc(sdoc("id", "8", "str", "b", "float", "64.5", "int", "4055"));
indexDoc(sdoc("id", "6", "str", "a", "float", "64.5", "int", "7"));
indexDoc(sdoc("id", "1", "str", "a", "float", "64.5", "int", "7"));
indexDoc(sdoc("id", "4", "str", "a", "float", "11.1", "int", "6"));
indexDoc(sdoc("id", "3", "str", "a", "float", "11.1", "int", "3"));
commit();
// base case: ensure cursorMark that matches no docs doesn't blow up
cursorMark = CURSOR_MARK_START;
params = params("q", "id:9999999",
"rows","4",
"fl", "id",
"sort", "id desc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(0, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp);
assertEquals(cursorMark, assertHashNextCursorMark(rsp));
// edge case: ensure rows=0 doesn't blow up and gives back same cursor for next
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*",
"rows","0",
"fl", "id",
"sort", "id desc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp);
assertEquals(cursorMark, assertHashNextCursorMark(rsp));
// simple id sort
cursorMark = CURSOR_MARK_START;
params = params("q", "-int:6",
"rows","4",
"fl", "id",
"sort", "id desc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(9, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 9, 8, 7, 6);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(9, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 5, 3, 2, 1);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(9, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 0);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(9, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp);
assertEquals("no more docs, but cursorMark has changed",
cursorMark, assertHashNextCursorMark(rsp));
// NOTE: because field stats and queryNorms can vary amongst shards,
// not all "obvious" score based sorts can be iterated cleanly.
// queries that seem like they should result in an obvious "tie" score
// between two documents (and would tie in a single node case) may actually
// get diff scores for diff docs if they are on diff shards
//
// so here, in this test, we can't assert a hardcoded score ordering -- we trust
// the full walk testing (below)
// int sort with dups, id tie breaker ... and some faceting
cursorMark = CURSOR_MARK_START;
params = params("q", "-int:2001 -int:4055",
"rows","3",
"fl", "id",
"facet", "true",
"facet.field", "str",
"json.nl", "map",
"sort", "int asc, id asc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 7, 0, 3);
assertEquals("a", rsp.getFacetField("str").getValues().get(0).getName());
assertEquals(4, rsp.getFacetField("str").getValues().get(0).getCount());
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 4, 1, 6);
assertEquals("a", rsp.getFacetField("str").getValues().get(0).getName());
assertEquals(4, rsp.getFacetField("str").getValues().get(0).getCount());
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 9, 2);
assertEquals("a", rsp.getFacetField("str").getValues().get(0).getName());
assertEquals(4, rsp.getFacetField("str").getValues().get(0).getCount());
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp);
assertEquals("a", rsp.getFacetField("str").getValues().get(0).getName());
assertEquals(4, rsp.getFacetField("str").getValues().get(0).getCount());
assertEquals("no more docs, but cursorMark has changed",
cursorMark, assertHashNextCursorMark(rsp));
// string sort with dups, id tie breaker
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*",
"rows","6",
"fl", "id",
"sort", "str asc, id desc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 6, 4, 3, 1, 8, 5);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 0, 9, 7, 2);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp);
assertEquals("no more docs, but cursorMark has changed",
cursorMark, assertHashNextCursorMark(rsp));
// tri-level sort with more dups of primary then fit on a page
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*",
"rows","2",
"fl", "id",
"sort", "float asc, int desc, id desc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 2, 9);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 7, 4);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 3, 8);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 5, 6);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 1, 0);
cursorMark = assertHashNextCursorMark(rsp);
// we've exactly exhausted all the results, but solr had no way of know that
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp);
assertEquals("no more docs, but cursorMark has changed",
cursorMark, assertHashNextCursorMark(rsp));
// trivial base case: rows bigger then number of matches
cursorMark = CURSOR_MARK_START;
params = params("q", "id:3 id:7",
"rows","111",
"fl", "id",
"sort", "int asc, id asc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(2, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 7, 3);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(2, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp);
assertEquals("no more docs, but cursorMark has changed",
cursorMark, assertHashNextCursorMark(rsp));
// sanity check our full walk method
SentinelIntSet ids;
ids = assertFullWalkNoDups(10, params("q", "*:*",
"rows", "4",
"sort", "id desc"));
assertEquals(10, ids.size());
ids = assertFullWalkNoDups(9, params("q", "*:*",
"rows", "1",
"fq", "-id:4",
"sort", "id asc"));
assertEquals(9, ids.size());
assertFalse("matched on id:4 unexpectedly", ids.exists(4));
ids = assertFullWalkNoDups(9, params("q", "*:*",
"rows", "3",
"fq", "-id:6",
"sort", "float desc, id asc, int asc"));
assertEquals(9, ids.size());
assertFalse("matched on id:6 unexpectedly", ids.exists(6));
ids = assertFullWalkNoDups(9, params("q", "float:[0 TO *] int:7 id:6",
"rows", "3",
"sort", "score desc, id desc"));
assertEquals(7, ids.size());
assertFalse("matched on id:9 unexpectedly", ids.exists(9));
assertFalse("matched on id:7 unexpectedly", ids.exists(7));
assertFalse("matched on id:2 unexpectedly", ids.exists(2));
// strategically delete/add some docs in the middle of walking the cursor
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*",
"rows","2",
"fl", "id",
"sort", "str asc, id asc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 1, 3);
cursorMark = assertHashNextCursorMark(rsp);
// delete the last guy we got
del("id:3");
commit();
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(9, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 4, 6);
cursorMark = assertHashNextCursorMark(rsp);
// delete the next guy we expect
del("id:0");
commit();
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 5, 8);
cursorMark = assertHashNextCursorMark(rsp);
// update a doc we've already seen so it repeats
indexDoc(sdoc("id", "5", "str", "c"));
commit();
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 2, 5);
cursorMark = assertHashNextCursorMark(rsp);
// update the next doc we expect so it's now in the past
indexDoc(sdoc("id", "7", "str", "a"));
commit();
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertDocList(rsp, 9);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertDocList(rsp);
assertEquals("no more docs, but cursorMark has changed",
cursorMark, assertHashNextCursorMark(rsp));
}
/** randomized testing of a non-trivial number of docs using assertFullWalkNoDups
*/
public void doRandomSortsOnLargeIndex() throws Exception {
final Collection<String> allFieldNames = getAllFieldNames();
final int initialDocs = _TestUtil.nextInt(random(),100,200);
final int totalDocs = atLeast(5000);
// start with a smallish number of documents, and test that we can do a full walk using a
// sort on *every* field in the schema...
for (int i = 1; i <= initialDocs; i++) {
SolrInputDocument doc = CursorPagingTest.buildRandomDocument(i);
indexDoc(doc);
}
commit();
for (String f : allFieldNames) {
for (String order : new String[] {" asc", " desc"}) {
String sort = f + order + ("id".equals(f) ? "" : ", id" + order);
String rows = "" + _TestUtil.nextInt(random(),13,50);
SentinelIntSet ids = assertFullWalkNoDups(initialDocs,
params("q", "*:*",
"fl","id",
"rows",rows,
"sort",sort));
assertEquals(initialDocs, ids.size());
}
}
// now add a lot more docs, and test a handful of randomized multi-level sorts
for (int i = initialDocs+1; i <= totalDocs; i++) {
SolrInputDocument doc = CursorPagingTest.buildRandomDocument(i);
indexDoc(doc);
}
commit();
final int numRandomSorts = atLeast(5);
for (int i = 0; i < numRandomSorts; i++) {
final String sort = CursorPagingTest.buildRandomSort(allFieldNames);
final String rows = "" + _TestUtil.nextInt(random(),63,113);
final String fl = random().nextBoolean() ? "id" : "id,score";
final boolean matchAll = random().nextBoolean();
final String q = matchAll ? "*:*" : CursorPagingTest.buildRandomQuery();
SentinelIntSet ids = assertFullWalkNoDups(totalDocs,
params("q", q,
"fl",fl,
"rows",rows,
"sort",sort));
if (matchAll) {
assertEquals(totalDocs, ids.size());
}
}
}
/**
* Asks the LukeRequestHandler on the control client for a list of the fields in the schema - excluding _version_
*/
private Collection<String> getAllFieldNames() throws SolrServerException, IOException {
LukeRequest req = new LukeRequest("/admin/luke");
req.setShowSchema(true);
NamedList<Object> rsp = controlClient.request(req);
NamedList<Object> fields = (NamedList) ((NamedList)rsp.get("schema")).get("fields");
ArrayList<String> names = new ArrayList<String>(fields.size());
for (Map.Entry<String,Object> item : fields) {
String f = item.getKey();
if (! f.equals("_version_")) {
names.add(item.getKey());
}
}
return Collections.<String>unmodifiableCollection(names);
}
/**
* execute a request, verify that we get an expected error
*/
public void assertFail(SolrParams p, ErrorCode expCode, String expSubstr)
throws Exception {
try {
ignoreException(expSubstr);
query(p);
fail("no exception matching expected: " + expCode.code + ": " + expSubstr);
} catch (SolrException e) {
assertEquals(expCode.code, e.code());
assertTrue("Expected substr not found: " + expSubstr + " <!< " + e.getMessage(),
e.getMessage().contains(expSubstr));
} finally {
unIgnoreException(expSubstr);
}
}
/**
* Given a QueryResponse returned by SolrServer.query, asserts that the
* numFound on the doc list matches the expectation
* @see SolrServer#query
*/
private void assertNumFound(int expected, QueryResponse rsp) {
assertEquals(expected, extractDocList(rsp).getNumFound());
}
/**
* Given a QueryResponse returned by SolrServer.query, asserts that the
* start on the doc list matches the expectation
* @see SolrServer#query
*/
private void assertStartsAt(int expected, QueryResponse rsp) {
assertEquals(expected, extractDocList(rsp).getStart());
}
/**
* Given a QueryResponse returned by SolrServer.query, asserts that the
* "id" of the list of documents returned matches the expected list
* @see SolrServer#query
*/
private void assertDocList(QueryResponse rsp, Object... ids) {
SolrDocumentList docs = extractDocList(rsp);
assertEquals("Wrong number of docs in response", ids.length, docs.size());
int i = 0;
for (Object id : ids) {
assertEquals(rsp.toString(), id, docs.get(i).get("id"));
i++;
}
}
/**
* Given a QueryResponse returned by SolrServer.query, asserts that the
* response does include {@link #CURSOR_MARK_NEXT} key and returns it
* @see SolrServer#query
*/
private String assertHashNextCursorMark(QueryResponse rsp) {
String r = rsp.getNextCursorMark();
assertNotNull(CURSOR_MARK_NEXT+" is null/missing", r);
return r;
}
private SolrDocumentList extractDocList(QueryResponse rsp) {
SolrDocumentList docs = rsp.getResults();
assertNotNull("docList is null", docs);
return docs;
}
/**
* <p>
* Given a set of params, executes a cursor query using {@link #CURSOR_MARK_START}
* and then continuously walks the results using {@link #CURSOR_MARK_START} as long
* as a non-0 number of docs ar returned. This method records the the set of all id's
* (must be postive ints) encountered and throws an assertion failure if any id is
* encountered more then once, or if the set grows above maxSize
* </p>
*
* <p>
* Note that this method explicily uses the "cloudClient" for executing the queries,
* instead of relying on the test infrastructure to execute the queries redundently
* aainst both the cloud client as well as a control client. This is because term stat
* differences in a sharded setup can result in differnent scores for documents compared
* to the control index -- which can affect the sorting in some cases and cause false
* negatives in the response comparisons (even if we don't include "score" in the "fl")
* </p>
*/
public SentinelIntSet assertFullWalkNoDups(int maxSize, SolrParams params) throws Exception {
SentinelIntSet ids = new SentinelIntSet(maxSize, -1);
String cursorMark = CURSOR_MARK_START;
int docsOnThisPage = Integer.MAX_VALUE;
while (0 < docsOnThisPage) {
QueryResponse rsp = cloudClient.query(p(params, CURSOR_MARK_PARAM, cursorMark));
String nextCursorMark = assertHashNextCursorMark(rsp);
SolrDocumentList docs = extractDocList(rsp);
docsOnThisPage = docs.size();
if (null != params.getInt(CommonParams.ROWS)) {
int rows = params.getInt(CommonParams.ROWS);
assertTrue("Too many docs on this page: " + rows + " < " + docsOnThisPage,
docsOnThisPage <= rows);
}
if (0 == docsOnThisPage) {
assertEquals("no more docs, but "+CURSOR_MARK_NEXT+" isn't same",
cursorMark, nextCursorMark);
}
for (SolrDocument doc : docs) {
int id = ((Integer)doc.get("id")).intValue();
assertFalse("walk already seen: " + id, ids.exists(id));
ids.put(id);
assertFalse("id set bigger then max allowed ("+maxSize+"): " + ids.size(),
maxSize < ids.size());
}
cursorMark = nextCursorMark;
}
return ids;
}
private SolrParams p(SolrParams params, String... other) {
SolrParams extras = params(other);
return SolrParams.wrapDefaults(params, extras);
}
}

View File

@ -39,10 +39,7 @@ public class SortableBinaryField extends BinaryField {
@Override
public void checkSchemaField(final SchemaField field) {
if (field.hasDocValues() && !field.multiValued() && !(field.isRequired() || field.getDefaultValue() != null)) {
throw new IllegalStateException(
"Field " + this + " has single-valued doc values enabled, but has no default value and is not required");
}
// NOOP, It's Aaaaaall Good.
}
@Override

View File

@ -0,0 +1,261 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.CursorPagingTest;
import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_START;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;
import java.util.Collection;
import java.util.Collections;
import org.junit.BeforeClass;
/**
* Primarily a test of parsing and serialization of the CursorMark values.
*
* NOTE: this class Reuses some utilities from {@link CursorPagingTest} that assume the same schema and configs.
*
* @see CursorPagingTest
*/
public class CursorMarkTest extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeTests() throws Exception {
System.setProperty("solr.test.useFilterForSortedQuery", Boolean.toString(random().nextBoolean()));
initCore(CursorPagingTest.TEST_SOLRCONFIG_NAME, CursorPagingTest.TEST_SCHEMAXML_NAME);
}
public void testNextCursorMark() {
final Collection<String> allFieldNames = getAllFieldNames();
final SolrQueryRequest req = req();
final IndexSchema schema = req.getSchema();
final String randomSortString = CursorPagingTest.buildRandomSort(allFieldNames);
final SortSpec ss = QueryParsing.parseSortSpec(randomSortString, req);
final CursorMark previous = new CursorMark(schema, ss);
previous.parseSerializedTotem(CURSOR_MARK_START);
List<Object> nextValues = Arrays.<Object>asList(buildRandomSortObjects(ss));
final CursorMark next = previous.createNext(nextValues);
assertEquals("next values not correct", nextValues, next.getSortValues());
assertEquals("next SortSpec not correct", ss, next.getSortSpec());
try {
// append to our random sort string so we know it has wrong num clauses
final SortSpec otherSort = QueryParsing.parseSortSpec(randomSortString+",id asc", req);
CursorMark trash = previous.createNext(Arrays.<Object>asList
(buildRandomSortObjects(otherSort)));
fail("didn't fail on next with incorrect num of sortvalues");
} catch (AssertionError e) {
// NOOP: we're happy
}
}
public void testInvalidUsage() {
final SolrQueryRequest req = req();
final IndexSchema schema = req.getSchema();
try {
final SortSpec ss = QueryParsing.parseSortSpec("str desc, score desc", req);
final CursorMark totem = new CursorMark(schema, ss);
fail("no failure from sort that doesn't include uniqueKey field");
} catch (SolrException e) {
assertEquals(ErrorCode.BAD_REQUEST.code, e.code());
assertTrue(0 < e.getMessage().indexOf("uniqueKey"));
}
for (final String dir : Arrays.asList("asc", "desc")) {
try {
final SortSpec ss = QueryParsing.parseSortSpec("score " + dir, req);
final CursorMark totem = new CursorMark(schema, ss);
fail("no failure from score only sort: " + dir);
} catch (SolrException e) {
assertEquals(ErrorCode.BAD_REQUEST.code, e.code());
assertTrue(0 < e.getMessage().indexOf("uniqueKey"));
}
try {
final SortSpec ss = QueryParsing.parseSortSpec("_docid_ "+dir+", id desc", req);
final CursorMark totem = new CursorMark(schema, ss);
fail("no failure from sort that includes _docid_: " + dir);
} catch (SolrException e) {
assertEquals(ErrorCode.BAD_REQUEST.code, e.code());
assertTrue(0 < e.getMessage().indexOf("_docid_"));
}
}
}
public void testGarbageParsing() {
final SolrQueryRequest req = req();
final IndexSchema schema = req.getSchema();
final SortSpec ss = QueryParsing.parseSortSpec("str asc, float desc, id asc", req);
final CursorMark totem = new CursorMark(schema, ss);
// totem string that isn't even valid base64
try {
totem.parseSerializedTotem("all the documents please");
fail("didn't fail on invalid base64 totem");
} catch (SolrException e) {
assertEquals(ErrorCode.BAD_REQUEST.code, e.code());
assertTrue(e.getMessage().contains("Unable to parse 'cursorMark'"));
}
// empty totem string
try {
totem.parseSerializedTotem("");
fail("didn't fail on empty totem");
} catch (SolrException e) {
assertEquals(ErrorCode.BAD_REQUEST.code, e.code());
assertTrue(e.getMessage().contains("Unable to parse 'cursorMark'"));
}
// whitespace-only totem string
try {
totem.parseSerializedTotem(" ");
fail("didn't fail on whitespace-only totem");
} catch (SolrException e) {
assertEquals(ErrorCode.BAD_REQUEST.code, e.code());
assertTrue(e.getMessage().contains("Unable to parse 'cursorMark'"));
}
// totem string from sort with diff num clauses
try {
final SortSpec otherSort = QueryParsing.parseSortSpec("double desc, id asc", req);
final CursorMark otherTotem = new CursorMark(schema, otherSort);
otherTotem.setSortValues(Arrays.<Object>asList(buildRandomSortObjects(otherSort)));
totem.parseSerializedTotem(otherTotem.getSerializedTotem());
fail("didn't fail on totem from incorrect sort (num clauses)");
} catch (SolrException e) {
assertEquals(ErrorCode.BAD_REQUEST.code, e.code());
assertTrue(e.getMessage().contains("wrong size"));
}
}
public void testRoundTripParsing() {
// for any valid SortSpec, and any legal values, we should be able to round
// trip serialize the totem and get the same values back.
final Collection<String> allFieldNames = getAllFieldNames();
final SolrQueryRequest req = req();
final IndexSchema schema = req.getSchema();
final int numRandomSorts = atLeast(50);
final int numRandomValIters = atLeast(10);
for (int i = 0; i < numRandomSorts; i++) {
final SortSpec ss = QueryParsing.parseSortSpec
(CursorPagingTest.buildRandomSort(allFieldNames), req);
final CursorMark totemIn = new CursorMark(schema, ss);
final CursorMark totemOut = new CursorMark(schema, ss);
// trivial case: regardless of sort, "*" should be valid and roundtrippable
totemIn.parseSerializedTotem(CURSOR_MARK_START);
assertEquals(CURSOR_MARK_START, totemIn.getSerializedTotem());
// values should be null (and still roundtrippable)
assertNull(totemIn.getSortValues());
totemOut.setSortValues(null);
assertEquals(CURSOR_MARK_START, totemOut.getSerializedTotem());
for (int j = 0; j < numRandomValIters; j++) {
final Object[] inValues = buildRandomSortObjects(ss);
totemIn.setSortValues(Arrays.<Object>asList(inValues));
totemOut.parseSerializedTotem(totemIn.getSerializedTotem());
final List<Object> out = totemOut.getSortValues();
assertNotNull(out);
final Object[] outValues = out.toArray();
assertArrayEquals(inValues, outValues);
}
}
}
private static Object[] buildRandomSortObjects(SortSpec ss) {
List<SchemaField> fields = ss.getSchemaFields();
assertNotNull(fields);
Object[] results = new Object[fields.size()];
for (int i = 0; i < results.length; i++) {
SchemaField sf = fields.get(i);
if (null == sf) {
// score or function
results[i] = (Float) random().nextFloat() * random().nextInt(); break;
} else if (0 == _TestUtil.nextInt(random(), 0, 7)) {
// emulate missing value for doc
results[i] = null;
} else {
final String fieldName = sf.getName();
assertNotNull(fieldName);
// Note: In some cases we build a human readable version of the sort value and then
// unmarshall it into the raw, real, sort values that are expected by the FieldTypes.
// In other cases we just build the raw value to begin with because it's easier
Object val = null;
if (fieldName.equals("id")) {
val = sf.getType().unmarshalSortValue(_TestUtil.randomSimpleString(random()));
} else if (fieldName.startsWith("str")) {
val = sf.getType().unmarshalSortValue(_TestUtil.randomRealisticUnicodeString(random()));
} else if (fieldName.startsWith("bin")) {
byte[] randBytes = new byte[_TestUtil.nextInt(random(), 1, 50)];
random().nextBytes(randBytes);
val = new BytesRef(randBytes);
} else if (fieldName.startsWith("int")) {
val = (Integer) random().nextInt();
} else if (fieldName.startsWith("long")) {
val = (Long) random().nextLong();
} else if (fieldName.startsWith("float")) {
val = (Float) random().nextFloat() * random().nextInt(); break;
} else if (fieldName.startsWith("double")) {
val = (Double) random().nextDouble() * random().nextInt(); break;
} else {
fail("fell through the rabbit hole, new field in schema? = " + fieldName);
}
results[i] = val;
}
}
return results;
}
/**
* a list of the fields in the schema - excluding _version_
*/
private Collection<String> getAllFieldNames() {
ArrayList<String> names = new ArrayList<String>(37);
for (String f : h.getCore().getLatestSchema().getFields().keySet()) {
if (! f.equals("_version_")) {
names.add(f);
}
}
return Collections.<String>unmodifiableCollection(names);
}
}

View File

@ -22,6 +22,7 @@ import org.apache.solr.client.solrj.beans.DocumentObjectBinder;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.params.CursorMarkParams;
import java.util.*;
@ -43,6 +44,7 @@ public class QueryResponse extends SolrResponseBase
private NamedList<NamedList<Object>> _spellInfo = null;
private NamedList<Object> _statsInfo = null;
private NamedList<NamedList<Number>> _termsInfo = null;
private String _cursorMarkNext = null;
// Grouping response
private NamedList<Object> _groupedInfo = null;
@ -133,6 +135,9 @@ public class QueryResponse extends SolrResponseBase
_termsInfo = (NamedList<NamedList<Number>>) res.getVal( i );
extractTermsInfo( _termsInfo );
}
else if ( CursorMarkParams.CURSOR_MARK_NEXT.equals( n ) ) {
_cursorMarkNext = (String) res.getVal( i );
}
}
if(_facetInfo != null) extractFacetInfo( _facetInfo );
}
@ -487,6 +492,10 @@ public class QueryResponse extends SolrResponseBase
public Map<String, FieldStatsInfo> getFieldStatsInfo() {
return _fieldStatsInfo;
}
public String getNextCursorMark() {
return _cursorMarkNext;
}
}

View File

@ -77,14 +77,6 @@ public interface CommonParams {
public static final String PING = "ping";
// SOLR-4228 end
//Issue 1726 start
/** score of the last document of the previous page */
public static final String PAGESCORE ="pageScore";
/** docid of the last document of the previous page */
public static final String PAGEDOC ="pageDoc";
//Issue 1726 end
/** stylesheet to apply to XML results */
public static final String XSL ="xsl";

View File

@ -0,0 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.common.params;
/**
* Parameters and constants used when dealing with cursor based requests across
* large sorted result sets.
*/
public interface CursorMarkParams {
/**
* Param clients should specify indicating that they want a cursor based search.
* The value specified must either be {@link #CURSOR_MARK_START} indicating the
* first page of results, or a value returned by a previous search via the
* {@link #CURSOR_MARK_NEXT} key.
*/
public static final String CURSOR_MARK_PARAM = "cursorMark";
/**
* Key used in Solr response to inform the client what the "next"
* {@link #CURSOR_MARK_PARAM} value should be to continue pagination
*/
public static final String CURSOR_MARK_NEXT = "nextCursorMark";
/**
* Special value for {@link #CURSOR_MARK_PARAM} indicating that cursor functionality
* should be used, and a new cursor value should be computed afte the last result,
* but that currently the "first page" of results is being requested
*/
public static final String CURSOR_MARK_START = "*";
}

View File

@ -521,6 +521,14 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 {
return query(true, q);
}
/**
* Sets distributed params.
* Returns the QueryResponse from {@link #queryServer},
*/
protected QueryResponse query(SolrParams params) throws Exception {
return query(true, params);
}
/**
* Returns the QueryResponse from {@link #queryServer}
*/
@ -531,6 +539,16 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 {
for (int i = 0; i < q.length; i += 2) {
params.add(q[i].toString(), q[i + 1].toString());
}
return query(setDistribParams, params);
}
/**
* Returns the QueryResponse from {@link #queryServer}
*/
protected QueryResponse query(boolean setDistribParams, SolrParams p) throws Exception {
final ModifiableSolrParams params = new ModifiableSolrParams(p);
// TODO: look into why passing true causes fails
params.set("distrib", "false");
final QueryResponse controlRsp = controlClient.query(params);

View File

@ -673,9 +673,10 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase {
* Validates a query matches some JSON test expressions using the default double delta tolerance.
* @see JSONTestUtil#DEFAULT_DELTA
* @see #assertJQ(SolrQueryRequest,double,String...)
* @return The request response as a JSON String if all test patterns pass
*/
public static void assertJQ(SolrQueryRequest req, String... tests) throws Exception {
assertJQ(req, JSONTestUtil.DEFAULT_DELTA, tests);
public static String assertJQ(SolrQueryRequest req, String... tests) throws Exception {
return assertJQ(req, JSONTestUtil.DEFAULT_DELTA, tests);
}
/**
* Validates a query matches some JSON test expressions and closes the
@ -690,8 +691,9 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase {
* @param req Solr request to execute
* @param delta tolerance allowed in comparing float/double values
* @param tests JSON path expression + '==' + expected value
* @return The request response as a JSON String if all test patterns pass
*/
public static void assertJQ(SolrQueryRequest req, double delta, String... tests) throws Exception {
public static String assertJQ(SolrQueryRequest req, double delta, String... tests) throws Exception {
SolrParams params = null;
try {
params = req.getParams();
@ -739,6 +741,7 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase {
}
}
}
return response;
} finally {
// restore the params
if (params != null && params != req.getParams()) req.setParams(params);