From ac5847231017f12d6a51348e1cdbd50c9732a224 Mon Sep 17 00:00:00 2001 From: S N Munendra Date: Thu, 24 Sep 2020 22:12:24 +0530 Subject: [PATCH] SOLR-14036: Remove explicit distrib=false from /terms handler (#1900) * Remove distrib=false from /terms handler so that terms are returned from across all shards instead of a single local shard. * cleanup shards parameter handling in TermsComponent. This is handled in HttpShardHandler * Remove redundant tests for shard whitelist * remove redundant terms params from ScoreNodeStream --- solr/CHANGES.txt | 2 + .../handler/component/TermsComponent.java | 46 ---- solr/core/src/resources/ImplicitPlugins.json | 3 +- .../component/CustomTermsComponentTest.java | 257 ------------------ .../DistributedTermsComponentTest.java | 32 +-- .../src/major-changes-in-solr-9.adoc | 2 + .../src/the-terms-component.adoc | 1 - .../solrj/io/stream/ScoreNodesStream.java | 4 - 8 files changed, 21 insertions(+), 326 deletions(-) delete mode 100644 solr/core/src/test/org/apache/solr/handler/component/CustomTermsComponentTest.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index a909850ffbb..3931744a082 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -124,6 +124,8 @@ Other Changes * SOLR-9607: Remove /terms configuration from solrconfig.xml, as implicit definition is up-to-par now (Alexandre Rafalovitch) +* SOLR-14036: Remove distrib=false from /terms handler's default parameters (David Smiley, Munendra S N) + Bug Fixes --------------------- * SOLR-14546: Fix for a relatively hard to hit issue in OverseerTaskProcessor that could lead to out of order execution diff --git a/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java b/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java index 4eddadc108a..6ab2dd20d2e 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; -import java.util.LinkedList; import java.util.List; import java.util.Locale; import java.util.Map; @@ -41,17 +40,13 @@ import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.mutable.MutableValue; import org.apache.solr.client.solrj.response.TermsResponse; import org.apache.solr.common.SolrException; -import org.apache.solr.common.SolrException.ErrorCode; -import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.params.ModifiableSolrParams; -import org.apache.solr.common.params.ShardParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.TermsParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.StrUtils; import org.apache.solr.common.util.Utils; -import org.apache.solr.handler.component.HttpShardHandlerFactory.WhitelistHostChecker; import org.apache.solr.request.SimpleFacets.CountPair; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.SchemaField; @@ -83,18 +78,10 @@ public class TermsComponent extends SearchComponent { public static final int UNLIMITED_MAX_COUNT = -1; public static final String COMPONENT_NAME = "terms"; - // This needs to be created here too, because Solr doesn't call init(...) on default components. Bug? - private WhitelistHostChecker whitelistHostChecker = new WhitelistHostChecker( - null, - !HttpShardHandlerFactory.doGetDisableShardsWhitelist()); - @Override public void init( @SuppressWarnings({"rawtypes"})NamedList args ) { super.init(args); - whitelistHostChecker = new WhitelistHostChecker( - (String) args.get(HttpShardHandlerFactory.INIT_SHARDS_WHITELIST), - !HttpShardHandlerFactory.doGetDisableShardsWhitelist()); } @Override @@ -104,39 +91,6 @@ public class TermsComponent extends SearchComponent { //the terms parameter is also used by json facet API. So we will get errors if we try to parse as boolean if (params.get(TermsParams.TERMS, "false").equals("true")) { rb.doTerms = true; - } else { - return; - } - - // TODO: temporary... this should go in a different component. - String shards = params.get(ShardParams.SHARDS); - if (shards != null) { - rb.isDistrib = true; - if (params.get(ShardParams.SHARDS_QT) == null) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No shards.qt parameter specified"); - } - List lst = StrUtils.splitSmart(shards, ",", true); - checkShardsWhitelist(rb, lst); - rb.shards = lst.toArray(new String[lst.size()]); - } - } - - protected void checkShardsWhitelist(final ResponseBuilder rb, final List lst) { - final List urls = new LinkedList(); - for (final String ele : lst) { - urls.addAll(StrUtils.splitSmart(ele, '|')); - } - - if (whitelistHostChecker.isWhitelistHostCheckingEnabled() && rb.req.getCore().getCoreContainer().getZkController() == null && !whitelistHostChecker.hasExplicitWhitelist()) { - throw new SolrException(ErrorCode.FORBIDDEN, "TermsComponent "+HttpShardHandlerFactory.INIT_SHARDS_WHITELIST - +" not configured but required when using the '"+ShardParams.SHARDS+"' parameter with the TermsComponent." - +HttpShardHandlerFactory.SET_SOLR_DISABLE_SHARDS_WHITELIST_CLUE); - } else { - ClusterState cs = null; - if (rb.req.getCore().getCoreContainer().getZkController() != null) { - cs = rb.req.getCore().getCoreContainer().getZkController().getClusterState(); - } - whitelistHostChecker.checkWhitelist(cs, urls.toString(), urls); } } diff --git a/solr/core/src/resources/ImplicitPlugins.json b/solr/core/src/resources/ImplicitPlugins.json index 3e9d439f925..bb17c3b07d1 100644 --- a/solr/core/src/resources/ImplicitPlugins.json +++ b/solr/core/src/resources/ImplicitPlugins.json @@ -133,8 +133,7 @@ "terms" ], "defaults": { - "terms": true, - "distrib": false + "terms": true } }, "/analysis/document": { diff --git a/solr/core/src/test/org/apache/solr/handler/component/CustomTermsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/CustomTermsComponentTest.java deleted file mode 100644 index 213e22480d5..00000000000 --- a/solr/core/src/test/org/apache/solr/handler/component/CustomTermsComponentTest.java +++ /dev/null @@ -1,257 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.component; - -import static org.hamcrest.CoreMatchers.containsString; - -import java.io.IOException; -import java.util.List; -import org.apache.solr.client.solrj.SolrQuery; -import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.request.QueryRequest; -import org.apache.solr.client.solrj.request.UpdateRequest; -import org.apache.solr.client.solrj.response.QueryResponse; -import org.apache.solr.client.solrj.response.TermsResponse; -import org.apache.solr.client.solrj.response.TermsResponse.Term; -import org.apache.solr.cloud.ConfigRequest; -import org.apache.solr.cloud.MiniSolrCloudCluster; -import org.apache.solr.common.util.NamedList; -import org.junit.Test; - -public class CustomTermsComponentTest extends ShardsWhitelistTest { - - public static class CustomTermsComponent extends TermsComponent { - - public void init( @SuppressWarnings({"rawtypes"})NamedList args ) - { - super.init(args); - } - - @Override - protected void checkShardsWhitelist(final ResponseBuilder rb, final List lst) { - // ignore shards whitelist - } - - } - - private static String addCustomHandlerWithTermsComponentConfig(final MiniSolrCloudCluster cluster, final String collection, - final String defaultHandlerName, final String shardsWhitelist) throws Exception { - return addCustomHandler(cluster, collection, defaultHandlerName, shardsWhitelist); - } - - private static String addCustomHandlerWithCustomTermsComponent(final MiniSolrCloudCluster cluster, final String collection, - final String defaultHandlerName) throws Exception { - return addCustomHandler(cluster, collection, defaultHandlerName, null); - } - - private static String addCustomHandler(final MiniSolrCloudCluster cluster, final String collection, - final String defaultHandlerName, final String shardsWhitelist) throws Exception { - - // determine custom handler name (the exact name should not matter) - final String customHandlerName = defaultHandlerName+"_custom"+random().nextInt(); - - // determine custom terms component name (the exact name should not matter) - final String customTermsComponentName = TermsComponent.COMPONENT_NAME+"_custom"+random().nextInt(); - - // determine terms component class name and attributes - final String customTermsComponentClass; - final String customTermsComponentAttributesJSON; - if (shardsWhitelist != null) { - customTermsComponentClass = TermsComponent.class.getName(); - customTermsComponentAttributesJSON = - " '"+HttpShardHandlerFactory.INIT_SHARDS_WHITELIST+"' : '"+shardsWhitelist+"',\n"; - } else { - customTermsComponentClass = CustomTermsComponent.class.getName(); - customTermsComponentAttributesJSON = ""; - } - - // add custom component - cluster.getSolrClient().request( - new ConfigRequest( - "{\n" + - " 'add-searchcomponent': {\n" + - customTermsComponentAttributesJSON + - " 'name': '"+customTermsComponentName+"',\n" + - " 'class': '"+customTermsComponentClass+"'\n" + - " }\n" + - "}"), - collection); - - // add custom handler - cluster.getSolrClient().request( - new ConfigRequest( - "{\n" + - " 'add-requesthandler': {\n" + - " 'name' : '"+customHandlerName+"',\n" + - " 'class' : '"+SearchHandler.class.getName()+"',\n" + - " 'components' : [ '"+QueryComponent.COMPONENT_NAME+"', '"+customTermsComponentName+"' ]\n" + - " }\n" + - "}"), - collection); - - return customHandlerName; - } - - @Test - @Override - public void test() throws Exception { - for (final String clusterId : clusterId2cluster.keySet()) { - final MiniSolrCloudCluster cluster = clusterId2cluster.get(clusterId); - final String collection = COLLECTION_NAME; - doTest(cluster, collection); - } - } - - private static void doTest(final MiniSolrCloudCluster cluster, final String collection) throws Exception { - - // add some documents - final String id = "id"; - final String f1 = "a_t"; - final String f2 = "b_t"; - final String v1 = "bee"; - final String v2 = "buzz"; - { - new UpdateRequest() - .add(sdoc(id, 1, f1, v1, f2, v2+" "+v2+" "+v2)) - .add(sdoc(id, 2, f1, v1+" "+v1, f2, v2+" "+v2)) - .add(sdoc(id, 3, f1, v1+" "+v1+" "+v1, f2, v2)) - .commit(cluster.getSolrClient(), collection); - } - - // search for the documents' terms ... - final String defaultHandlerName = "/select"; - - // search with the default handler ... - final String shards = findAndCheckTerms(cluster, collection, - defaultHandlerName, - null, // ... without specifying shards - (random().nextBoolean() ? null : f1), v1, - (random().nextBoolean() ? null : f2), v2, - null); - - // search with the default handler ... - findAndCheckTerms(cluster, collection, - defaultHandlerName, - shards, // ... with specified shards, but all valid - (random().nextBoolean() ? null : f1), v1, - (random().nextBoolean() ? null : f2), v2, - null); - - ignoreException("not on the shards whitelist"); - // this case should fail - findAndCheckTerms(cluster, collection, - defaultHandlerName, - shards + ",http://" + DEAD_HOST_1, // ... with specified shards with one invalid - (random().nextBoolean() ? null : f1), v1, - (random().nextBoolean() ? null : f2), v2, - "No live SolrServers available to handle this request"); - unIgnoreException("not on the shards whitelist"); - - // configure a custom handler ... - final String customHandlerName; - if (random().nextBoolean()) { - // ... with a shards whitelist - customHandlerName = addCustomHandlerWithTermsComponentConfig(cluster, collection, defaultHandlerName, shards); - } else { - // ... with a custom terms component that disregards shards whitelist logic - customHandlerName = addCustomHandlerWithCustomTermsComponent(cluster, collection, defaultHandlerName); - } - - // search with the custom handler ... - findAndCheckTerms(cluster, collection, - customHandlerName, - shards, // ... with specified shards - (random().nextBoolean() ? null : f1), v1, - (random().nextBoolean() ? null : f2), v2, - null); - - } - - private static String findAndCheckTerms(final MiniSolrCloudCluster cluster, final String collection, - String requestHandlerName, String in_shards, - String field1, String value1, - String field2, String value2, - String solrServerExceptionMessagePrefix) throws IOException { - - // compose the query ... - final SolrQuery solrQuery = new SolrQuery("*:*"); - solrQuery.setRequestHandler(requestHandlerName); - solrQuery.add("shards.qt", requestHandlerName); - // ... asking for terms ... - solrQuery.setTerms(true); - if (field1 != null) { - solrQuery.addTermsField(field1); - } - if (field2 != null) { - solrQuery.addTermsField(field2); - } - // ... and shards info ... - solrQuery.add("shards.info", "true"); - // ... passing shards to use (if we have a preference) - if (in_shards != null) { - solrQuery.add("shards", in_shards); - } - - // make the query - final QueryResponse queryResponse; - try { - queryResponse = new QueryRequest(solrQuery) - .process(cluster.getSolrClient(), collection); - assertNull("expected exception ("+solrServerExceptionMessagePrefix+") not encountered", solrServerExceptionMessagePrefix); - } catch (SolrServerException sse) { - assertNotNull("unexpectedly caught exception "+sse, solrServerExceptionMessagePrefix); - assertTrue(sse.getMessage().startsWith(solrServerExceptionMessagePrefix)); - assertThat(sse.getCause().getMessage(), containsString("not on the shards whitelist")); - return null; - } - - // analyse the response ... - final TermsResponse termsResponse = queryResponse.getTermsResponse(); - // ... checking the terms returned ... - checkTermsResponse(termsResponse, field1, value1); - checkTermsResponse(termsResponse, field2, value2); - // ... and assemble info about the shards ... - final String out_shards = extractShardAddresses(queryResponse, ","); - // ... to return to the caller - return out_shards; - } - - - @SuppressWarnings("unchecked") - private static String extractShardAddresses(final QueryResponse queryResponse, final String delimiter) { - final StringBuilder sb = new StringBuilder(); - final NamedList nl = (NamedList)queryResponse.getResponse().get("shards.info"); - assertNotNull(queryResponse.toString(), nl); - for (int ii = 0; ii < nl.size(); ++ii) { - final String shardAddress = (String)((NamedList)nl.getVal(ii)).get("shardAddress"); - if (sb.length() > 0) { - sb.append(delimiter); - } - sb.append(shardAddress); - } - return sb.toString(); - } - - private static void checkTermsResponse(TermsResponse termsResponse, String field, String value) { - if (field != null) { - final List ttList = termsResponse.getTerms(field); - assertEquals(1, ttList.size()); - assertEquals(value, ttList.get(0).getTerm()); - } - } - -} diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java index 9f0a26c0104..b83320c08c5 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java @@ -51,11 +51,11 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase del("*:*"); index(id, random.nextInt(), "b_t", "snake a,b spider shark snail slug seal", "foo_i_p", "1"); - query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "foo_i_p"); + query("qt", "/terms", "terms.fl", "foo_i_p"); del("*:*"); // verify point field on empty index - query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "foo_i_p"); + query("qt", "/terms", "terms.fl", "foo_i_p"); index(id, random.nextInt(), "b_t", "snake a,b spider shark snail slug seal", "foo_i", "1"); index(id, random.nextInt(), "b_t", "snake spider shark snail slug", "foo_i", "2", "foo_date_p", "2015-01-03T14:30:00Z"); @@ -70,25 +70,25 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase handle.clear(); handle.put("terms", UNORDERED); - query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "b_t"); - query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms.fl", "b_t", "terms.lower", "s"); - query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms.fl", "b_t", "terms.prefix", "sn", "terms.lower", "sn"); - query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.upper", "sn"); + query("qt", "/terms", "terms.fl", "b_t"); + query("qt", "/terms", "terms.limit", 5, "terms.fl", "b_t", "terms.lower", "s"); + query("qt", "/terms", "terms.limit", 5, "terms.fl", "b_t", "terms.prefix", "sn", "terms.lower", "sn"); + query("qt", "/terms", "terms.limit", 5, "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.upper", "sn"); // terms.sort - query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.sort", "index"); - query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.upper", "sn", "terms.sort", "index"); - query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "b_t", "terms.sort", "index"); + query("qt", "/terms", "terms.limit", 5, "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.sort", "index"); + query("qt", "/terms", "terms.limit", 5, "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.upper", "sn", "terms.sort", "index"); + query("qt", "/terms", "terms.fl", "b_t", "terms.sort", "index"); // terms.list - query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "b_t", "terms.list", "snake,zebra,ant,bad"); - query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "foo_i", "terms.list", "2,3,1"); - query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "foo_i", "terms.stats", "true","terms.list", "2,3,1"); - query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "b_t", "terms.list", "snake,zebra", "terms.ttf", "true"); - query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "b_t", "terms.fl", "c_t", "terms.list", "snake,ant,zebra", "terms.ttf", "true"); + query("qt", "/terms", "terms.fl", "b_t", "terms.list", "snake,zebra,ant,bad"); + query("qt", "/terms", "terms.fl", "foo_i", "terms.list", "2,3,1"); + query("qt", "/terms", "terms.fl", "foo_i", "terms.stats", "true","terms.list", "2,3,1"); + query("qt", "/terms", "terms.fl", "b_t", "terms.list", "snake,zebra", "terms.ttf", "true"); + query("qt", "/terms", "terms.fl", "b_t", "terms.fl", "c_t", "terms.list", "snake,ant,zebra", "terms.ttf", "true"); // for date point field - query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "foo_date_p"); + query("qt", "/terms", "terms.fl", "foo_date_p"); // terms.ttf=true doesn't work for point fields - //query("qt", "/terms", "shards.qt", "/terms", "terms.fl", "foo_date_p", "terms.ttf", "true"); + //query("qt", "/terms", "terms.fl", "foo_date_p", "terms.ttf", "true"); } protected QueryResponse query(Object... q) throws Exception { diff --git a/solr/solr-ref-guide/src/major-changes-in-solr-9.adoc b/solr/solr-ref-guide/src/major-changes-in-solr-9.adoc index acebbcea848..f59f5e59a65 100644 --- a/solr/solr-ref-guide/src/major-changes-in-solr-9.adoc +++ b/solr/solr-ref-guide/src/major-changes-in-solr-9.adoc @@ -128,6 +128,8 @@ _(raw; not yet edited)_ * SOLR-14510: The `writeStartDocumentList` in `TextResponseWriter` now receives an extra boolean parameter representing the "exactness" of the numFound value (exact vs approximation). Any custom response writer extending `TextResponseWriter` will need to implement this abstract method now (instead previous with the same name but without the new boolean parameter). +* SOLR-14036: Implicit /terms handler now returns terms across all shards in SolrCloud instead of only the local core. Users/apps may be assuming the old behavior. A request can be modified via the standard distrib=false param to only use the local core receiving the request. + * SOLR-12847: maxShardsPerNode parameter has been removed because it was broken and inconsistent with other replica placement strategies. Other relevant placement strategies should be used instead, such as autoscaling policy or rules-based placement. diff --git a/solr/solr-ref-guide/src/the-terms-component.adoc b/solr/solr-ref-guide/src/the-terms-component.adoc index b83e03f8601..e53f4697620 100644 --- a/solr/solr-ref-guide/src/the-terms-component.adoc +++ b/solr/solr-ref-guide/src/the-terms-component.adoc @@ -35,7 +35,6 @@ The definition is equivalent to: === Using the Terms Component in a Request Handler Solr comes with an <> definition `/terms`, which enables (only) Terms component. -This handler, by default, can only be used on a single Solr core (`distrib=false`). If you want to enable Terms component when using another Request Handler, `terms=true` parameter needs to be passed during the request or be set in the handler's defaults. diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java index e4f9ebbe21d..bd04263b5f1 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java @@ -42,8 +42,6 @@ import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.TermsParams; import org.apache.solr.common.util.NamedList; -import static org.apache.solr.common.params.CommonParams.DISTRIB; - /** * Iterates over a gatherNodes() expression and scores the Tuples based on tf-idf. * @@ -209,12 +207,10 @@ public class ScoreNodesStream extends TupleStream implements Expressible CloudSolrClient client = clientCache.getCloudSolrClient(zkHost); ModifiableSolrParams params = new ModifiableSolrParams(); params.add(CommonParams.QT, "/terms"); - params.add(TermsParams.TERMS, "true"); params.add(TermsParams.TERMS_FIELD, field); params.add(TermsParams.TERMS_STATS, "true"); params.add(TermsParams.TERMS_LIST, builder.toString()); params.add(TermsParams.TERMS_LIMIT, Integer.toString(nodes.size())); - params.add(DISTRIB, "true"); QueryRequest request = new QueryRequest(params);