SOLR-14036: Remove explicit distrib=false from /terms handler (#1900)

* Remove distrib=false from /terms handler so that terms are returned from across all shards instead of a single local shard.
* cleanup shards parameter handling in TermsComponent. This is handled in HttpShardHandler
* Remove redundant tests for shard whitelist
* remove redundant terms params from ScoreNodeStream
This commit is contained in:
S N Munendra 2020-09-24 22:12:24 +05:30 committed by GitHub
parent 7be262ee5a
commit ac58472310
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 21 additions and 326 deletions

View File

@ -124,6 +124,8 @@ Other Changes
* SOLR-9607: Remove /terms configuration from solrconfig.xml, as implicit definition is up-to-par now (Alexandre Rafalovitch)
* SOLR-14036: Remove distrib=false from /terms handler's default parameters (David Smiley, Munendra S N)
Bug Fixes
---------------------
* SOLR-14546: Fix for a relatively hard to hit issue in OverseerTaskProcessor that could lead to out of order execution

View File

@ -20,7 +20,6 @@ import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
@ -41,17 +40,13 @@ import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.solr.client.solrj.response.TermsResponse;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.TermsParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.common.util.Utils;
import org.apache.solr.handler.component.HttpShardHandlerFactory.WhitelistHostChecker;
import org.apache.solr.request.SimpleFacets.CountPair;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
@ -83,18 +78,10 @@ public class TermsComponent extends SearchComponent {
public static final int UNLIMITED_MAX_COUNT = -1;
public static final String COMPONENT_NAME = "terms";
// This needs to be created here too, because Solr doesn't call init(...) on default components. Bug?
private WhitelistHostChecker whitelistHostChecker = new WhitelistHostChecker(
null,
!HttpShardHandlerFactory.doGetDisableShardsWhitelist());
@Override
public void init( @SuppressWarnings({"rawtypes"})NamedList args )
{
super.init(args);
whitelistHostChecker = new WhitelistHostChecker(
(String) args.get(HttpShardHandlerFactory.INIT_SHARDS_WHITELIST),
!HttpShardHandlerFactory.doGetDisableShardsWhitelist());
}
@Override
@ -104,39 +91,6 @@ public class TermsComponent extends SearchComponent {
//the terms parameter is also used by json facet API. So we will get errors if we try to parse as boolean
if (params.get(TermsParams.TERMS, "false").equals("true")) {
rb.doTerms = true;
} else {
return;
}
// TODO: temporary... this should go in a different component.
String shards = params.get(ShardParams.SHARDS);
if (shards != null) {
rb.isDistrib = true;
if (params.get(ShardParams.SHARDS_QT) == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No shards.qt parameter specified");
}
List<String> lst = StrUtils.splitSmart(shards, ",", true);
checkShardsWhitelist(rb, lst);
rb.shards = lst.toArray(new String[lst.size()]);
}
}
protected void checkShardsWhitelist(final ResponseBuilder rb, final List<String> lst) {
final List<String> urls = new LinkedList<String>();
for (final String ele : lst) {
urls.addAll(StrUtils.splitSmart(ele, '|'));
}
if (whitelistHostChecker.isWhitelistHostCheckingEnabled() && rb.req.getCore().getCoreContainer().getZkController() == null && !whitelistHostChecker.hasExplicitWhitelist()) {
throw new SolrException(ErrorCode.FORBIDDEN, "TermsComponent "+HttpShardHandlerFactory.INIT_SHARDS_WHITELIST
+" not configured but required when using the '"+ShardParams.SHARDS+"' parameter with the TermsComponent."
+HttpShardHandlerFactory.SET_SOLR_DISABLE_SHARDS_WHITELIST_CLUE);
} else {
ClusterState cs = null;
if (rb.req.getCore().getCoreContainer().getZkController() != null) {
cs = rb.req.getCore().getCoreContainer().getZkController().getClusterState();
}
whitelistHostChecker.checkWhitelist(cs, urls.toString(), urls);
}
}

View File

@ -133,8 +133,7 @@
"terms"
],
"defaults": {
"terms": true,
"distrib": false
"terms": true
}
},
"/analysis/document": {

View File

@ -1,257 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import static org.hamcrest.CoreMatchers.containsString;
import java.io.IOException;
import java.util.List;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.TermsResponse;
import org.apache.solr.client.solrj.response.TermsResponse.Term;
import org.apache.solr.cloud.ConfigRequest;
import org.apache.solr.cloud.MiniSolrCloudCluster;
import org.apache.solr.common.util.NamedList;
import org.junit.Test;
public class CustomTermsComponentTest extends ShardsWhitelistTest {
public static class CustomTermsComponent extends TermsComponent {
public void init( @SuppressWarnings({"rawtypes"})NamedList args )
{
super.init(args);
}
@Override
protected void checkShardsWhitelist(final ResponseBuilder rb, final List<String> lst) {
// ignore shards whitelist
}
}
private static String addCustomHandlerWithTermsComponentConfig(final MiniSolrCloudCluster cluster, final String collection,
final String defaultHandlerName, final String shardsWhitelist) throws Exception {
return addCustomHandler(cluster, collection, defaultHandlerName, shardsWhitelist);
}
private static String addCustomHandlerWithCustomTermsComponent(final MiniSolrCloudCluster cluster, final String collection,
final String defaultHandlerName) throws Exception {
return addCustomHandler(cluster, collection, defaultHandlerName, null);
}
private static String addCustomHandler(final MiniSolrCloudCluster cluster, final String collection,
final String defaultHandlerName, final String shardsWhitelist) throws Exception {
// determine custom handler name (the exact name should not matter)
final String customHandlerName = defaultHandlerName+"_custom"+random().nextInt();
// determine custom terms component name (the exact name should not matter)
final String customTermsComponentName = TermsComponent.COMPONENT_NAME+"_custom"+random().nextInt();
// determine terms component class name and attributes
final String customTermsComponentClass;
final String customTermsComponentAttributesJSON;
if (shardsWhitelist != null) {
customTermsComponentClass = TermsComponent.class.getName();
customTermsComponentAttributesJSON =
" '"+HttpShardHandlerFactory.INIT_SHARDS_WHITELIST+"' : '"+shardsWhitelist+"',\n";
} else {
customTermsComponentClass = CustomTermsComponent.class.getName();
customTermsComponentAttributesJSON = "";
}
// add custom component
cluster.getSolrClient().request(
new ConfigRequest(
"{\n" +
" 'add-searchcomponent': {\n" +
customTermsComponentAttributesJSON +
" 'name': '"+customTermsComponentName+"',\n" +
" 'class': '"+customTermsComponentClass+"'\n" +
" }\n" +
"}"),
collection);
// add custom handler
cluster.getSolrClient().request(
new ConfigRequest(
"{\n" +
" 'add-requesthandler': {\n" +
" 'name' : '"+customHandlerName+"',\n" +
" 'class' : '"+SearchHandler.class.getName()+"',\n" +
" 'components' : [ '"+QueryComponent.COMPONENT_NAME+"', '"+customTermsComponentName+"' ]\n" +
" }\n" +
"}"),
collection);
return customHandlerName;
}
@Test
@Override
public void test() throws Exception {
for (final String clusterId : clusterId2cluster.keySet()) {
final MiniSolrCloudCluster cluster = clusterId2cluster.get(clusterId);
final String collection = COLLECTION_NAME;
doTest(cluster, collection);
}
}
private static void doTest(final MiniSolrCloudCluster cluster, final String collection) throws Exception {
// add some documents
final String id = "id";
final String f1 = "a_t";
final String f2 = "b_t";
final String v1 = "bee";
final String v2 = "buzz";
{
new UpdateRequest()
.add(sdoc(id, 1, f1, v1, f2, v2+" "+v2+" "+v2))
.add(sdoc(id, 2, f1, v1+" "+v1, f2, v2+" "+v2))
.add(sdoc(id, 3, f1, v1+" "+v1+" "+v1, f2, v2))
.commit(cluster.getSolrClient(), collection);
}
// search for the documents' terms ...
final String defaultHandlerName = "/select";
// search with the default handler ...
final String shards = findAndCheckTerms(cluster, collection,
defaultHandlerName,
null, // ... without specifying shards
(random().nextBoolean() ? null : f1), v1,
(random().nextBoolean() ? null : f2), v2,
null);
// search with the default handler ...
findAndCheckTerms(cluster, collection,
defaultHandlerName,
shards, // ... with specified shards, but all valid
(random().nextBoolean() ? null : f1), v1,
(random().nextBoolean() ? null : f2), v2,
null);
ignoreException("not on the shards whitelist");
// this case should fail
findAndCheckTerms(cluster, collection,
defaultHandlerName,
shards + ",http://" + DEAD_HOST_1, // ... with specified shards with one invalid
(random().nextBoolean() ? null : f1), v1,
(random().nextBoolean() ? null : f2), v2,
"No live SolrServers available to handle this request");
unIgnoreException("not on the shards whitelist");
// configure a custom handler ...
final String customHandlerName;
if (random().nextBoolean()) {
// ... with a shards whitelist
customHandlerName = addCustomHandlerWithTermsComponentConfig(cluster, collection, defaultHandlerName, shards);
} else {
// ... with a custom terms component that disregards shards whitelist logic
customHandlerName = addCustomHandlerWithCustomTermsComponent(cluster, collection, defaultHandlerName);
}
// search with the custom handler ...
findAndCheckTerms(cluster, collection,
customHandlerName,
shards, // ... with specified shards
(random().nextBoolean() ? null : f1), v1,
(random().nextBoolean() ? null : f2), v2,
null);
}
private static String findAndCheckTerms(final MiniSolrCloudCluster cluster, final String collection,
String requestHandlerName, String in_shards,
String field1, String value1,
String field2, String value2,
String solrServerExceptionMessagePrefix) throws IOException {
// compose the query ...
final SolrQuery solrQuery = new SolrQuery("*:*");
solrQuery.setRequestHandler(requestHandlerName);
solrQuery.add("shards.qt", requestHandlerName);
// ... asking for terms ...
solrQuery.setTerms(true);
if (field1 != null) {
solrQuery.addTermsField(field1);
}
if (field2 != null) {
solrQuery.addTermsField(field2);
}
// ... and shards info ...
solrQuery.add("shards.info", "true");
// ... passing shards to use (if we have a preference)
if (in_shards != null) {
solrQuery.add("shards", in_shards);
}
// make the query
final QueryResponse queryResponse;
try {
queryResponse = new QueryRequest(solrQuery)
.process(cluster.getSolrClient(), collection);
assertNull("expected exception ("+solrServerExceptionMessagePrefix+") not encountered", solrServerExceptionMessagePrefix);
} catch (SolrServerException sse) {
assertNotNull("unexpectedly caught exception "+sse, solrServerExceptionMessagePrefix);
assertTrue(sse.getMessage().startsWith(solrServerExceptionMessagePrefix));
assertThat(sse.getCause().getMessage(), containsString("not on the shards whitelist"));
return null;
}
// analyse the response ...
final TermsResponse termsResponse = queryResponse.getTermsResponse();
// ... checking the terms returned ...
checkTermsResponse(termsResponse, field1, value1);
checkTermsResponse(termsResponse, field2, value2);
// ... and assemble info about the shards ...
final String out_shards = extractShardAddresses(queryResponse, ",");
// ... to return to the caller
return out_shards;
}
@SuppressWarnings("unchecked")
private static String extractShardAddresses(final QueryResponse queryResponse, final String delimiter) {
final StringBuilder sb = new StringBuilder();
final NamedList<Object> nl = (NamedList<Object>)queryResponse.getResponse().get("shards.info");
assertNotNull(queryResponse.toString(), nl);
for (int ii = 0; ii < nl.size(); ++ii) {
final String shardAddress = (String)((NamedList<Object>)nl.getVal(ii)).get("shardAddress");
if (sb.length() > 0) {
sb.append(delimiter);
}
sb.append(shardAddress);
}
return sb.toString();
}
private static void checkTermsResponse(TermsResponse termsResponse, String field, String value) {
if (field != null) {
final List<Term> ttList = termsResponse.getTerms(field);
assertEquals(1, ttList.size());
assertEquals(value, ttList.get(0).getTerm());
}
}
}

View File

@ -51,11 +51,11 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase
del("*:*");
index(id, random.nextInt(), "b_t", "snake a,b spider shark snail slug seal", "foo_i_p", "1");
query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "foo_i_p");
query("qt", "/terms", "terms.fl", "foo_i_p");
del("*:*");
// verify point field on empty index
query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "foo_i_p");
query("qt", "/terms", "terms.fl", "foo_i_p");
index(id, random.nextInt(), "b_t", "snake a,b spider shark snail slug seal", "foo_i", "1");
index(id, random.nextInt(), "b_t", "snake spider shark snail slug", "foo_i", "2", "foo_date_p", "2015-01-03T14:30:00Z");
@ -70,25 +70,25 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase
handle.clear();
handle.put("terms", UNORDERED);
query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "b_t");
query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms.fl", "b_t", "terms.lower", "s");
query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms.fl", "b_t", "terms.prefix", "sn", "terms.lower", "sn");
query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.upper", "sn");
query("qt", "/terms", "terms.fl", "b_t");
query("qt", "/terms", "terms.limit", 5, "terms.fl", "b_t", "terms.lower", "s");
query("qt", "/terms", "terms.limit", 5, "terms.fl", "b_t", "terms.prefix", "sn", "terms.lower", "sn");
query("qt", "/terms", "terms.limit", 5, "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.upper", "sn");
// terms.sort
query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.sort", "index");
query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.upper", "sn", "terms.sort", "index");
query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "b_t", "terms.sort", "index");
query("qt", "/terms", "terms.limit", 5, "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.sort", "index");
query("qt", "/terms", "terms.limit", 5, "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.upper", "sn", "terms.sort", "index");
query("qt", "/terms", "terms.fl", "b_t", "terms.sort", "index");
// terms.list
query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "b_t", "terms.list", "snake,zebra,ant,bad");
query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "foo_i", "terms.list", "2,3,1");
query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "foo_i", "terms.stats", "true","terms.list", "2,3,1");
query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "b_t", "terms.list", "snake,zebra", "terms.ttf", "true");
query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "b_t", "terms.fl", "c_t", "terms.list", "snake,ant,zebra", "terms.ttf", "true");
query("qt", "/terms", "terms.fl", "b_t", "terms.list", "snake,zebra,ant,bad");
query("qt", "/terms", "terms.fl", "foo_i", "terms.list", "2,3,1");
query("qt", "/terms", "terms.fl", "foo_i", "terms.stats", "true","terms.list", "2,3,1");
query("qt", "/terms", "terms.fl", "b_t", "terms.list", "snake,zebra", "terms.ttf", "true");
query("qt", "/terms", "terms.fl", "b_t", "terms.fl", "c_t", "terms.list", "snake,ant,zebra", "terms.ttf", "true");
// for date point field
query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "foo_date_p");
query("qt", "/terms", "terms.fl", "foo_date_p");
// terms.ttf=true doesn't work for point fields
//query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "foo_date_p", "terms.ttf", "true");
//query("qt", "/terms", "terms.fl", "foo_date_p", "terms.ttf", "true");
}
protected QueryResponse query(Object... q) throws Exception {

View File

@ -128,6 +128,8 @@ _(raw; not yet edited)_
* SOLR-14510: The `writeStartDocumentList` in `TextResponseWriter` now receives an extra boolean parameter representing the "exactness" of the numFound value (exact vs approximation).
Any custom response writer extending `TextResponseWriter` will need to implement this abstract method now (instead previous with the same name but without the new boolean parameter).
* SOLR-14036: Implicit /terms handler now returns terms across all shards in SolrCloud instead of only the local core. Users/apps may be assuming the old behavior. A request can be modified via the standard distrib=false param to only use the local core receiving the request.
* SOLR-12847: maxShardsPerNode parameter has been removed because it was broken and inconsistent with other replica placement strategies. Other relevant placement strategies
should be used instead, such as autoscaling policy or rules-based placement.

View File

@ -35,7 +35,6 @@ The definition is equivalent to:
=== Using the Terms Component in a Request Handler
Solr comes with an <<implicit-requesthandlers.adoc#query-handlers,Implicit RequestHandler>> definition `/terms`, which enables (only) Terms component.
This handler, by default, can only be used on a single Solr core (`distrib=false`).
If you want to enable Terms component when using another Request Handler, `terms=true` parameter needs to be passed during the request or be set in the handler's defaults.

View File

@ -42,8 +42,6 @@ import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.TermsParams;
import org.apache.solr.common.util.NamedList;
import static org.apache.solr.common.params.CommonParams.DISTRIB;
/**
* Iterates over a gatherNodes() expression and scores the Tuples based on tf-idf.
*
@ -209,12 +207,10 @@ public class ScoreNodesStream extends TupleStream implements Expressible
CloudSolrClient client = clientCache.getCloudSolrClient(zkHost);
ModifiableSolrParams params = new ModifiableSolrParams();
params.add(CommonParams.QT, "/terms");
params.add(TermsParams.TERMS, "true");
params.add(TermsParams.TERMS_FIELD, field);
params.add(TermsParams.TERMS_STATS, "true");
params.add(TermsParams.TERMS_LIST, builder.toString());
params.add(TermsParams.TERMS_LIMIT, Integer.toString(nodes.size()));
params.add(DISTRIB, "true");
QueryRequest request = new QueryRequest(params);