SOLR-13337: faster /terms response in distributed mode.

This commit is contained in:
Mikhail Khludnev 2019-04-22 00:16:04 +03:00
parent 377ac573dd
commit a95e68e13b
4 changed files with 85 additions and 18 deletions

View File

@ -237,6 +237,9 @@ Improvements
* SOLR-11035: (at least) 2 distinct failures possible when clients attempt searches during SolrCore reload,
added test band-aid for DocValuesNotIndexedTest.
* SOLR-13337: Only request the minimum required number of terms from each shard when using terms.sort=index and none
are discarded due to terms.min/maxcount (Morten Bøgeskov,Munendra S N via Mikhail Khludnev)
Other Changes
----------------------

View File

@ -445,22 +445,31 @@ public class TermsComponent extends SearchComponent {
rb._termsHelper = null;
}
private static ShardRequest createShardQuery(SolrParams params) {
static ShardRequest createShardQuery(SolrParams params) {
ShardRequest sreq = new ShardRequest();
sreq.purpose = ShardRequest.PURPOSE_GET_TERMS;
// base shard request on original parameters
sreq.params = new ModifiableSolrParams(params);
// remove any limits for shards, we want them to return all possible
// responses
// we want this so we can calculate the correct counts
// dont sort by count to avoid that unnecessary overhead on the shards
sreq.params.remove(TermsParams.TERMS_MAXCOUNT);
sreq.params.remove(TermsParams.TERMS_MINCOUNT);
sreq.params.set(TermsParams.TERMS_LIMIT, -1);
sreq.params.set(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_INDEX);
// if using index-order, we can send all parameters to all shards
// since all required data are returned within the first n rows
String actualSort = sreq.params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT);
boolean fast = actualSort.equals(TermsParams.TERMS_SORT_INDEX) &&
sreq.params.getLong(TermsParams.TERMS_MINCOUNT, 0) <= 1 &&
sreq.params.getLong(TermsParams.TERMS_MAXCOUNT, -1) <=0;
if (!fast) {
// remove any limits for shards, we want them to return all possible
// responses
// we want this so we can calculate the correct counts
// dont sort by count to avoid that unnecessary overhead on the shards
sreq.params.remove(TermsParams.TERMS_MAXCOUNT);
sreq.params.remove(TermsParams.TERMS_MINCOUNT);
sreq.params.set(TermsParams.TERMS_LIMIT, -1);
sreq.params.set(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_INDEX);
}
return sreq;
}

View File

@ -16,7 +16,14 @@
*/
package org.apache.solr.handler.component;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
import java.util.stream.Stream;
import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.junit.Test;
/**
@ -30,15 +37,16 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase
@Test
public void test() throws Exception {
Random random = random();
del("*:*");
index(id, 18, "b_t", "snake a,b spider shark snail slug seal", "foo_i", "1");
index(id, 19, "b_t", "snake spider shark snail slug", "foo_i", "2");
index(id, 20, "b_t", "snake spider shark snail", "foo_i", "3");
index(id, 21, "b_t", "snake spider shark", "foo_i", "2");
index(id, 22, "b_t", "snake spider", "c_t", "snake spider");
index(id, 23, "b_t", "snake", "c_t", "snake");
index(id, 24, "b_t", "ant zebra", "c_t", "ant zebra");
index(id, 25, "b_t", "zebra", "c_t", "zebra");
index(id, random.nextInt(), "b_t", "snake a,b spider shark snail slug seal", "foo_i", "1");
index(id, random.nextInt(), "b_t", "snake spider shark snail slug", "foo_i", "2");
index(id, random.nextInt(), "b_t", "snake spider shark snail", "foo_i", "3");
index(id, random.nextInt(), "b_t", "snake spider shark", "foo_i", "2");
index(id, random.nextInt(), "b_t", "snake spider", "c_t", "snake spider");
index(id, random.nextInt(), "b_t", "snake", "c_t", "snake");
index(id, random.nextInt(), "b_t", "ant zebra", "c_t", "ant zebra");
index(id, random.nextInt(), "b_t", "zebra", "c_t", "zebra");
commit();
handle.clear();
@ -48,13 +56,37 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase
query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms", "true", "terms.fl", "b_t", "terms.lower", "s");
query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms", "true", "terms.fl", "b_t", "terms.prefix", "sn", "terms.lower", "sn");
query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms", "true", "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.upper", "sn");
// terms.sort
query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms", "true", "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.sort", "index");
query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms", "true", "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.upper", "sn", "terms.sort", "index");
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.sort", "index");
// terms.list
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.list", "snake,zebra,ant,bad");
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "foo_i", "terms.list", "2,3,1");
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "foo_i", "terms.stats", "true","terms.list", "2,3,1");
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.list", "snake,zebra", "terms.ttf", "true");
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.fl", "c_t", "terms.list", "snake,ant,zebra", "terms.ttf", "true");
}
protected QueryResponse query(Object... q) throws Exception {
if (Stream.of(q).noneMatch(s->s.equals("terms.list"))) {
// SOLR-9243 doesn't support max/min count
for (int i = 0; i < q.length; i+=2) {
if (q[i].equals("terms.sort") && q[i+1].equals("index") || rarely()) {
List<Object> params = new ArrayList<Object>(Arrays.asList(q));
if (usually()) {
params.add("terms.mincount");
params.add(random().nextInt(4)-1);
}
if (usually()) {
params.add("terms.maxcount");
params.add(random().nextInt(4)-1);
}
q = params.toArray(new Object[params.size()]);
break;
}
}
}
return super.query(q);
}
}

View File

@ -511,4 +511,27 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
assertU(commit());
}
}
@Test
public void testTermsSortIndexDistribution() {
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_INDEX);
params.set(TermsParams.TERMS_LIMIT, "any-number");
assertEquals(params.toString(), createShardQueryParamsString(params));
params.set(TermsParams.TERMS_MINCOUNT, "0");
assertEquals(params.toString(), createShardQueryParamsString(params));
params.set(TermsParams.TERMS_MINCOUNT, "1");
assertEquals(params.toString(), createShardQueryParamsString(params));
// include all (also lower mincount) since 2 shards can have one each
params.set(TermsParams.TERMS_MINCOUNT, "2");
assertNotEquals(params.toString(), createShardQueryParamsString(params));
// "unlimited" since 2 shards can have 30 each, and term then should not be included
params.remove(TermsParams.TERMS_MINCOUNT);
params.set(TermsParams.TERMS_MAXCOUNT, "32");
assertNotEquals(params.toString(), createShardQueryParamsString(params));
}
private static String createShardQueryParamsString(ModifiableSolrParams params) {
return TermsComponent.createShardQuery(params).params.toString();
}
}