SOLR-5768: Add a distrib.singlePass parameter to make EXECUTE_QUERY phase fetch all fields and skip GET_FIELDS

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1607360 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shalin Shekhar Mangar 2014-07-02 13:22:14 +00:00
parent 81e25a66c6
commit e15ff59929
5 changed files with 117 additions and 38 deletions

View File

@ -127,6 +127,9 @@ New Features
* SOLR-6044: The 'clusterstatus' API should return live_nodes as well. (shalin)
* SOLR-5768: Add a distrib.singlePass parameter to make EXECUTE_QUERY phase fetch all fields
and skip GET_FIELDS. (Gregg Donovan, shalin)
Bug Fixes
----------------------

View File

@ -30,6 +30,7 @@ import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.index.AtomicReaderContext;
@ -111,7 +112,7 @@ import java.util.Comparator;
public class QueryComponent extends SearchComponent
{
public static final String COMPONENT_NAME = "query";
@Override
public void prepare(ResponseBuilder rb) throws IOException
{
@ -148,7 +149,7 @@ public class QueryComponent extends SearchComponent
Query q = parser.getQuery();
if (q == null) {
// normalize a null query to a query that matches nothing
q = new BooleanQuery();
q = new BooleanQuery();
}
rb.setQuery( q );
@ -174,7 +175,7 @@ public class QueryComponent extends SearchComponent
rb.setSortSpec( parser.getSort(true) );
rb.setQparser(parser);
final String cursorStr = rb.req.getParams().get(CursorMarkParams.CURSOR_MARK_PARAM);
if (null != cursorStr) {
final CursorMark cursorMark = new CursorMark(rb.req.getSchema(),
@ -218,10 +219,10 @@ public class QueryComponent extends SearchComponent
if (null != rb.getCursorMark()) {
// It's hard to imagine, conceptually, what it would mean to combine
// grouping with a cursor - so for now we just don't allow the combination at all
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can not use Grouping with " +
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can not use Grouping with " +
CursorMarkParams.CURSOR_MARK_PARAM);
}
SolrIndexSearcher.QueryCommand cmd = rb.getQueryCommand();
SolrIndexSearcher searcher = rb.req.getSearcher();
GroupingSpecification groupingSpec = new GroupingSpecification();
@ -290,7 +291,7 @@ public class QueryComponent extends SearchComponent
long timeAllowed = (long)params.getInt( CommonParams.TIME_ALLOWED, -1 );
if (null != rb.getCursorMark() && 0 < timeAllowed) {
// fundementally incompatible
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can not search using both " +
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can not search using both " +
CursorMarkParams.CURSOR_MARK_PARAM + " and " + CommonParams.TIME_ALLOWED);
}
@ -322,7 +323,7 @@ public class QueryComponent extends SearchComponent
res.docSet = searcher.getDocSet(queries);
}
rb.setResults(res);
ResultContext ctx = new ResultContext();
ctx.docs = rb.getResults().docList;
ctx.query = null; // anything?
@ -493,7 +494,7 @@ public class QueryComponent extends SearchComponent
if ( ! rb.req.getParams().getBool(ShardParams.IS_SHARD,false) ) {
if (null != rb.getNextCursorMark()) {
rb.rsp.add(CursorMarkParams.CURSOR_MARK_NEXT,
rb.rsp.add(CursorMarkParams.CURSOR_MARK_NEXT,
rb.getNextCursorMark().getSerializedTotem());
}
}
@ -603,7 +604,7 @@ public class QueryComponent extends SearchComponent
comparator.setScorer(new FakeScorer(doc, score));
comparator.copy(0, doc);
Object val = comparator.value(0);
if (null != ft) val = ft.marshalSortValue(val);
if (null != ft) val = ft.marshalSortValue(val);
vals[position] = val;
}
@ -624,7 +625,7 @@ public class QueryComponent extends SearchComponent
}
}
@Override
@Override
public int distributedProcess(ResponseBuilder rb) throws IOException {
if (rb.grouping()) {
return groupedDistributedProcess(rb);
@ -784,7 +785,7 @@ public class QueryComponent extends SearchComponent
rb.rsp.add("response", rb._responseDocs);
if (null != rb.getNextCursorMark()) {
rb.rsp.add(CursorMarkParams.CURSOR_MARK_NEXT,
rb.rsp.add(CursorMarkParams.CURSOR_MARK_NEXT,
rb.getNextCursorMark().getSerializedTotem());
}
}
@ -802,8 +803,11 @@ public class QueryComponent extends SearchComponent
// one-pass algorithm if only id and score fields are requested, but not if fl=score since that's the same as fl=*,score
ReturnFields fields = rb.rsp.getReturnFields();
if(fields != null && fields.wantsField(keyFieldName)
&& fields.getRequestedFieldNames() != null && Arrays.asList(keyFieldName, "score").containsAll(fields.getRequestedFieldNames())) {
// distrib.singlePass=true forces a one-pass query regardless of requested fields
boolean distribSinglePass = rb.req.getParams().getBool(ShardParams.DISTRIB_SINGLE_PASS, false);
if(distribSinglePass || (fields != null && fields.wantsField(keyFieldName)
&& fields.getRequestedFieldNames() != null && Arrays.asList(keyFieldName, "score").containsAll(fields.getRequestedFieldNames()))) {
sreq.purpose |= ShardRequest.PURPOSE_GET_FIELDS;
rb.onePassDistributedQuery = true;
}
@ -833,21 +837,53 @@ public class QueryComponent extends SearchComponent
sreq.params.set(CommonParams.ROWS, rb.getSortSpec().getOffset() + rb.getSortSpec().getCount());
}
// in this first phase, request only the unique key field
// and any fields needed for merging.
sreq.params.set(ResponseBuilder.FIELD_SORT_VALUES,"true");
if ( (rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES)!=0 || rb.getSortSpec().includesScore()) {
sreq.params.set(CommonParams.FL, keyFieldName + ",score");
boolean shardQueryIncludeScore = (rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES) != 0 || rb.getSortSpec().includesScore();
if (distribSinglePass) {
String fl = rb.req.getParams().get(CommonParams.FL);
if (fl == null) {
if (fields.getRequestedFieldNames() == null && fields.wantsAllFields()) {
fl = "*";
} else {
fl = "";
for (String s : fields.getRequestedFieldNames()) {
fl += s + ",";
}
}
}
if (!fields.wantsField(keyFieldName)) {
// the user has not requested the unique key but
// we still need to add it otherwise mergeIds can't work
if (fl.endsWith(",")) {
fl += keyFieldName;
} else {
fl += "," + keyFieldName;
}
}
sreq.params.set(CommonParams.FL, updateFl(fl, shardQueryIncludeScore));
} else {
sreq.params.set(CommonParams.FL, keyFieldName);
// in this first phase, request only the unique key field and any fields needed for merging.
if (shardQueryIncludeScore) {
sreq.params.set(CommonParams.FL, keyFieldName + ",score");
} else {
sreq.params.set(CommonParams.FL, keyFieldName);
}
}
rb.addRequest(this, sreq);
}
String updateFl(String originalFields, boolean includeScoreIfMissing) {
if (includeScoreIfMissing && !scorePattern.matcher(originalFields).find()) {
return originalFields + ",score";
} else {
return originalFields;
}
}
private static final Pattern scorePattern = Pattern.compile("\\bscore\\b");
private void mergeIds(ResponseBuilder rb, ShardRequest sreq) {

View File

@ -18,10 +18,14 @@ package org.apache.solr.handler.component;
*/
import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.junit.BeforeClass;
import java.nio.ByteBuffer;
import java.util.Map;
/**
* Test for QueryComponent's distributed querying optimization.
@ -48,32 +52,67 @@ public class DistributedQueryComponentOptimizationTest extends BaseDistributedSe
public void doTest() throws Exception {
del("*:*");
index(id, "1", "text", "a", "payload", ByteBuffer.wrap(new byte[] { 0x12, 0x62, 0x15 }), // 2
index(id, "1", "text", "a", "test_sS", "21", "payload", ByteBuffer.wrap(new byte[] { 0x12, 0x62, 0x15 }), // 2
// quick check to prove "*" dynamicField hasn't been broken by somebody mucking with schema
"asdfasdf_field_should_match_catchall_dynamic_field_adsfasdf", "value");
index(id, "2", "text", "b", "payload", ByteBuffer.wrap(new byte[] { 0x25, 0x21, 0x16 })); // 5
index(id, "3", "text", "a", "payload", ByteBuffer.wrap(new byte[] { 0x35, 0x32, 0x58 })); // 8
index(id, "4", "text", "b", "payload", ByteBuffer.wrap(new byte[] { 0x25, 0x21, 0x15 })); // 4
index(id, "5", "text", "a", "payload", ByteBuffer.wrap(new byte[] { 0x35, 0x35, 0x10, 0x00 })); // 9
index(id, "6", "text", "c", "payload", ByteBuffer.wrap(new byte[] { 0x1a, 0x2b, 0x3c, 0x00, 0x00, 0x03 })); // 3
index(id, "7", "text", "c", "payload", ByteBuffer.wrap(new byte[] { 0x00, 0x3c, 0x73 })); // 1
index(id, "8", "text", "c", "payload", ByteBuffer.wrap(new byte[] { 0x59, 0x2d, 0x4d })); // 11
index(id, "9", "text", "a", "payload", ByteBuffer.wrap(new byte[] { 0x39, 0x79, 0x7a })); // 10
index(id, "10", "text", "b", "payload", ByteBuffer.wrap(new byte[] { 0x31, 0x39, 0x7c })); // 6
index(id, "11", "text", "d", "payload", ByteBuffer.wrap(new byte[] { (byte)0xff, (byte)0xaf, (byte)0x9c })); // 13
index(id, "12", "text", "d", "payload", ByteBuffer.wrap(new byte[] { 0x34, (byte)0xdd, 0x4d })); // 7
index(id, "13", "text", "d", "payload", ByteBuffer.wrap(new byte[] { (byte)0x80, 0x11, 0x33 })); // 12
index(id, "2", "text", "b", "test_sS", "22", "payload", ByteBuffer.wrap(new byte[] { 0x25, 0x21, 0x16 })); // 5
index(id, "3", "text", "a", "test_sS", "23", "payload", ByteBuffer.wrap(new byte[] { 0x35, 0x32, 0x58 })); // 8
index(id, "4", "text", "b", "test_sS", "24", "payload", ByteBuffer.wrap(new byte[] { 0x25, 0x21, 0x15 })); // 4
index(id, "5", "text", "a", "test_sS", "25", "payload", ByteBuffer.wrap(new byte[] { 0x35, 0x35, 0x10, 0x00 })); // 9
index(id, "6", "text", "c", "test_sS", "26", "payload", ByteBuffer.wrap(new byte[] { 0x1a, 0x2b, 0x3c, 0x00, 0x00, 0x03 })); // 3
index(id, "7", "text", "c", "test_sS", "27", "payload", ByteBuffer.wrap(new byte[] { 0x00, 0x3c, 0x73 })); // 1
index(id, "8", "text", "c", "test_sS", "28", "payload", ByteBuffer.wrap(new byte[] { 0x59, 0x2d, 0x4d })); // 11
index(id, "9", "text", "a", "test_sS", "29", "payload", ByteBuffer.wrap(new byte[] { 0x39, 0x79, 0x7a })); // 10
index(id, "10", "text", "b", "test_sS", "30", "payload", ByteBuffer.wrap(new byte[] { 0x31, 0x39, 0x7c })); // 6
index(id, "11", "text", "d", "test_sS", "31", "payload", ByteBuffer.wrap(new byte[] { (byte)0xff, (byte)0xaf, (byte)0x9c })); // 13
index(id, "12", "text", "d", "test_sS", "32", "payload", ByteBuffer.wrap(new byte[] { 0x34, (byte)0xdd, 0x4d })); // 7
index(id, "13", "text", "d", "test_sS", "33", "payload", ByteBuffer.wrap(new byte[] { (byte)0x80, 0x11, 0x33 })); // 12
commit();
handle.put("QTime", SKIPVAL);
QueryResponse rsp;
rsp = query("q", "*:*", "fl", "id,score", "sort", "payload asc", "rows", "20");
rsp = query("q", "*:*", "fl", "id,test_sS,score", "sort", "payload asc", "rows", "20");
assertFieldValues(rsp.getResults(), id, 7, 1, 6, 4, 2, 10, 12, 3, 5, 9, 8, 13, 11);
assertFieldValues(rsp.getResults(), "test_sS", "27", "21", "26", "24", "22", "30", "32", "23", "25", "29", "28", "33", "31");
rsp = query("q", "*:*", "fl", "id,score", "sort", "payload desc", "rows", "20");
assertFieldValues(rsp.getResults(), id, 11, 13, 8, 9, 5, 3, 12, 10, 2, 4, 6, 1, 7);
// works with just fl=id as well
rsp = query("q", "*:*", "fl", "id", "sort", "payload desc", "rows", "20");
assertFieldValues(rsp.getResults(), id, 11, 13, 8, 9, 5, 3, 12, 10, 2, 4, 6, 1, 7);
rsp = query("q", "*:*", "fl", "id,score", "sort", "payload asc", "rows", "20");
assertFieldValues(rsp.getResults(), id, 7, 1, 6, 4, 2, 10, 12, 3, 5, 9, 8, 13, 11);
rsp = query("q", "*:*", "fl", "id,test_sS,score", "sort", "payload asc", "rows", "20", "distrib.singlePass", "true");
assertFieldValues(rsp.getResults(), id, 7, 1, 6, 4, 2, 10, 12, 3, 5, 9, 8, 13, 11);
assertFieldValues(rsp.getResults(), "test_sS", "27", "21", "26", "24", "22", "30", "32", "23", "25", "29", "28", "33", "31");
QueryResponse nonDistribRsp = query("q", "*:*", "fl", "id,test_sS,score", "sort", "payload asc", "rows", "20");
compareResponses(rsp, nonDistribRsp); // make sure distrib and distrib.singlePass return the same thing
nonDistribRsp = query("q", "*:*", "fl", "score", "sort", "payload asc", "rows", "20");
rsp = query("q", "*:*", "fl", "score", "sort", "payload asc", "rows", "20", "distrib.singlePass", "true");
compareResponses(rsp, nonDistribRsp); // make sure distrib and distrib.singlePass return the same thing
// verify that the optimization actually works
verifySinglePass("q", "*:*", "fl", "id", "sort", "payload desc", "rows", "20"); // id only is optimized by default
verifySinglePass("q", "*:*", "fl", "id,score", "sort", "payload desc", "rows", "20"); // id,score only is optimized by default
verifySinglePass("q", "*:*", "fl", "score", "sort", "payload asc", "rows", "20", "distrib.singlePass", "true");
}
private void verifySinglePass(String... q) throws SolrServerException {
QueryResponse rsp;ModifiableSolrParams params = new ModifiableSolrParams();
for (int i = 0; i < q.length; i += 2) {
params.add(q[i].toString(), q[i + 1].toString());
}
params.add("shards", getShardsString());
params.add("debug", "track");
rsp = queryServer(new ModifiableSolrParams(params));
Map<String, Object> debugMap = rsp.getDebugMap();
SimpleOrderedMap<Object> track = (SimpleOrderedMap<Object>) debugMap.get("track");
assertNotNull(track);
assertNotNull(track.get("EXECUTE_QUERY"));
assertNull("A single pass request should not have a GET_FIELDS phase", track.get("GET_FIELDS"));
}
}

View File

@ -52,5 +52,6 @@ public interface ShardParams {
public static final String _ROUTE_ = "_route_";
/** Force a single-pass distributed query? (true/false) */
public static final String DISTRIB_SINGLE_PASS = "distrib.singlePass";
}

View File

@ -1753,11 +1753,11 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase {
SolrDocument doc = documents.get(docNum - 1);
Object expected = expectedValues[docNum - 1];
Object actual = doc.get(fieldName);
if (null != expected && null != actual) {
if ( ! expected.equals(actual)) {
fail( "Unexpected " + fieldName + " field value in document #" + docNum
+ ": expected=[" + expected + "], actual=[" + actual + "]");
}
if ((null == expected && null != actual) ||
(null != expected && null == actual) ||
(null != expected && null != actual && !expected.equals(actual))) {
fail("Unexpected " + fieldName + " field value in document #" + docNum
+ ": expected=[" + expected + "], actual=[" + actual + "]");
}
}
}