From 54e6528304027ab15bbac2b62746b34eed7b4f40 Mon Sep 17 00:00:00 2001 From: Dan Fox Date: Sun, 14 Jun 2020 17:47:55 -0400 Subject: [PATCH] SOLR-13749: XCJF -> Cross-Collection Join change solrUrlWhitelist -> allowSolrUrls ref guide: update join parser section to be inline with the unit tests. --- solr/CHANGES.txt | 3 + .../apache/solr/search/JoinQParserPlugin.java | 49 +++-- .../org/apache/solr/search/QParserPlugin.java | 2 - ...r.java => CrossCollectionJoinQParser.java} | 26 +-- ...ery.java => CrossCollectionJoinQuery.java} | 17 +- .../search/join/ScoreJoinQParserPlugin.java | 5 +- .../solr/search/join/XCJFQParserPlugin.java | 66 ------- .../{xcjf => ccjoin}/conf/schema.xml | 0 .../{xcjf => ccjoin}/conf/solrconfig.xml | 22 +-- .../apache/solr/search/QueryEqualityTest.java | 6 - ...java => CrossCollectionJoinQueryTest.java} | 88 ++++----- solr/solr-ref-guide/src/other-parsers.adoc | 184 +++++++++--------- 12 files changed, 207 insertions(+), 261 deletions(-) rename solr/core/src/java/org/apache/solr/search/join/{XCJFQParser.java => CrossCollectionJoinQParser.java} (75%) rename solr/core/src/java/org/apache/solr/search/join/{XCJFQuery.java => CrossCollectionJoinQuery.java} (94%) delete mode 100644 solr/core/src/java/org/apache/solr/search/join/XCJFQParserPlugin.java rename solr/core/src/test-files/solr/configsets/{xcjf => ccjoin}/conf/schema.xml (100%) rename solr/core/src/test-files/solr/configsets/{xcjf => ccjoin}/conf/solrconfig.xml (70%) rename solr/core/src/test/org/apache/solr/search/join/{XCJFQueryTest.java => CrossCollectionJoinQueryTest.java} (68%) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 4ffb2f3ed3e..9a6b0e8545c 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -111,6 +111,9 @@ New Features * SOLR-14404: CoreContainer level custom requesthandlers (noble) +* SOLR-13749: Join queries can now work cross-collection, even when sharded or when spanning nodes. + (Dan Fox, Kevin Watters, via Gus Heck, David Smiley) + Improvements --------------------- * SOLR-14316: Remove unchecked type conversion warning in JavaBinCodec's readMapEntry's equals() method diff --git a/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java index 736b8f0b7e5..e1a689e869c 100644 --- a/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java @@ -16,27 +16,32 @@ */ package org.apache.solr.search; -import java.lang.invoke.MethodHandles; +import java.util.HashSet; +import java.util.List; +import java.util.Set; import org.apache.lucene.search.Query; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; import org.apache.solr.core.CoreContainer; import org.apache.solr.core.SolrCore; import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.join.CrossCollectionJoinQParser; import org.apache.solr.search.join.ScoreJoinQParserPlugin; import org.apache.solr.util.RefCounted; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; public class JoinQParserPlugin extends QParserPlugin { - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); public static final String NAME = "join"; /** Choose the internal algorithm */ private static final String METHOD = "method"; + private String routerField; + + private Set allowSolrUrls; + private static class JoinParams { final String fromField; final String fromCore; @@ -56,7 +61,7 @@ public class JoinQParserPlugin extends QParserPlugin { private enum Method { index { @Override - Query makeFilter(QParser qparser) throws SyntaxError { + Query makeFilter(QParser qparser, JoinQParserPlugin plugin) throws SyntaxError { final JoinParams jParams = parseJoin(qparser); final JoinQuery q = new JoinQuery(jParams.fromField, jParams.toField, jParams.fromCore, jParams.fromQuery); q.fromCoreOpenTime = jParams.fromCoreOpenTime; @@ -65,21 +70,28 @@ public class JoinQParserPlugin extends QParserPlugin { }, dvWithScore { @Override - Query makeFilter(QParser qparser) throws SyntaxError { + Query makeFilter(QParser qparser, JoinQParserPlugin plugin) throws SyntaxError { return new ScoreJoinQParserPlugin().createParser(qparser.qstr, qparser.localParams, qparser.params, qparser.req).parse(); } }, topLevelDV { @Override - Query makeFilter(QParser qparser) throws SyntaxError { + Query makeFilter(QParser qparser, JoinQParserPlugin plugin) throws SyntaxError { final JoinParams jParams = parseJoin(qparser); final JoinQuery q = new TopLevelJoinQuery(jParams.fromField, jParams.toField, jParams.fromCore, jParams.fromQuery); q.fromCoreOpenTime = jParams.fromCoreOpenTime; return q; } + }, + crossCollection { + @Override + Query makeFilter(QParser qparser, JoinQParserPlugin plugin) throws SyntaxError { + return new CrossCollectionJoinQParser(qparser.qstr, qparser.localParams, qparser.params, qparser.req, + plugin.routerField, plugin.allowSolrUrls).parse(); + } }; - abstract Query makeFilter(QParser qparser) throws SyntaxError; + abstract Query makeFilter(QParser qparser, JoinQParserPlugin plugin) throws SyntaxError; JoinParams parseJoin(QParser qparser) throws SyntaxError { final String fromField = qparser.getParam("from"); @@ -127,8 +139,23 @@ public class JoinQParserPlugin extends QParserPlugin { } } + @Override + @SuppressWarnings({"unchecked"}) + public void init(@SuppressWarnings({"rawtypes"})NamedList args) { + routerField = (String) args.get("routerField"); + + if (args.get("allowSolrUrls") != null) { + allowSolrUrls = new HashSet<>(); + allowSolrUrls.addAll((List) args.get("allowSolrUrls")); + } else { + allowSolrUrls = null; + } + } + @Override public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { + final JoinQParserPlugin plugin = this; + return new QParser(qstr, localParams, params, req) { @Override @@ -136,14 +163,14 @@ public class JoinQParserPlugin extends QParserPlugin { if (localParams != null && localParams.get(METHOD) != null) { // TODO Make sure 'method' is valid value here and give users a nice error final Method explicitMethod = Method.valueOf(localParams.get(METHOD)); - return explicitMethod.makeFilter(this); + return explicitMethod.makeFilter(this, plugin); } // Legacy join behavior before introduction of SOLR-13892 if(localParams!=null && localParams.get(ScoreJoinQParserPlugin.SCORE)!=null) { return new ScoreJoinQParserPlugin().createParser(qstr, localParams, params, req).parse(); } else { - return Method.index.makeFilter(this); + return Method.index.makeFilter(this, plugin); } } }; @@ -160,4 +187,4 @@ public class JoinQParserPlugin extends QParserPlugin { return new JoinQuery(fromField, toField, null, subQuery); } -} +} \ No newline at end of file diff --git a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java index 43076822dc3..059994d3360 100644 --- a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java @@ -30,7 +30,6 @@ import org.apache.solr.search.join.BlockJoinParentQParserPlugin; import org.apache.solr.search.join.FiltersQParserPlugin; import org.apache.solr.search.join.GraphQParserPlugin; import org.apache.solr.search.join.HashRangeQParserPlugin; -import org.apache.solr.search.join.XCJFQParserPlugin; import org.apache.solr.search.mlt.MLTQParserPlugin; import org.apache.solr.util.plugin.NamedListInitializedPlugin; @@ -87,7 +86,6 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin, SolrI map.put(PayloadCheckQParserPlugin.NAME, new PayloadCheckQParserPlugin()); map.put(BoolQParserPlugin.NAME, new BoolQParserPlugin()); map.put(MinHashQParserPlugin.NAME, new MinHashQParserPlugin()); - map.put(XCJFQParserPlugin.NAME, new XCJFQParserPlugin()); map.put(HashRangeQParserPlugin.NAME, new HashRangeQParserPlugin()); standardPlugins = Collections.unmodifiableMap(map); diff --git a/solr/core/src/java/org/apache/solr/search/join/XCJFQParser.java b/solr/core/src/java/org/apache/solr/search/join/CrossCollectionJoinQParser.java similarity index 75% rename from solr/core/src/java/org/apache/solr/search/join/XCJFQParser.java rename to solr/core/src/java/org/apache/solr/search/join/CrossCollectionJoinQParser.java index 9be3c3d651b..82d99256e2b 100644 --- a/solr/core/src/java/org/apache/solr/search/join/XCJFQParser.java +++ b/solr/core/src/java/org/apache/solr/search/join/CrossCollectionJoinQParser.java @@ -31,11 +31,11 @@ import java.util.Iterator; import java.util.Set; @SuppressWarnings("WeakerAccess") -public class XCJFQParser extends QParser { +public class CrossCollectionJoinQParser extends QParser { public static final String ZK_HOST = "zkHost"; public static final String SOLR_URL = "solrUrl"; - public static final String COLLECTION = "collection"; + public static final String FROM_INDEX = "fromIndex"; public static final String FROM = "from"; public static final String TO = "to"; public static final String ROUTED_BY_JOIN_KEY = "routed"; @@ -44,16 +44,17 @@ public class XCJFQParser extends QParser { public static final int TTL_DEFAULT = 60 * 60; // in seconds private static final Set OWN_PARAMS = new HashSet<>(Arrays.asList( - QueryParsing.TYPE, QueryParsing.V, ZK_HOST, SOLR_URL, COLLECTION, FROM, TO, ROUTED_BY_JOIN_KEY, TTL)); + QueryParsing.TYPE, QueryParsing.V, ZK_HOST, SOLR_URL, FROM_INDEX, FROM, TO, ROUTED_BY_JOIN_KEY, TTL)); private final String routerField; - private final Set solrUrlWhitelist; + private final Set allowSolrUrls; - public XCJFQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req, String routerField, Set solrUrlWhiteList) { + public CrossCollectionJoinQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req, + String routerField, Set allowSolrUrls) { super(qstr, localParams, params, req); this.routerField = routerField; // If specified in the config, this will limit which solr url's the parser can connect to. - this.solrUrlWhitelist = solrUrlWhiteList; + this.allowSolrUrls = allowSolrUrls; } @Override @@ -63,17 +64,18 @@ public class XCJFQParser extends QParser { String solrUrl = localParams.get(SOLR_URL); // Test if this is a valid solr url. if (solrUrl != null) { - if (solrUrlWhitelist == null) { - throw new SyntaxError("White list must be configured to use solrUrl parameter."); + if (allowSolrUrls == null) { + throw new SyntaxError("allowSolrUrls list must be configured to use solrUrl parameter."); } - if (!solrUrlWhitelist.contains(solrUrl)) { - throw new SyntaxError("Solr Url was not in the whitelist. Please check your configuration."); + if (!allowSolrUrls.contains(solrUrl)) { + throw new SyntaxError("Solr URL was not in allowSolrUrls list. Please check your configuration."); } } - String collection = localParams.get(COLLECTION); + String collection = localParams.get(FROM_INDEX); String fromField = localParams.get(FROM); String toField = localParams.get(TO); + boolean routedByJoinKey = localParams.getBool(ROUTED_BY_JOIN_KEY, toField.equals(routerField)); int ttl = localParams.getInt(TTL, TTL_DEFAULT); @@ -85,6 +87,6 @@ public class XCJFQParser extends QParser { } } - return new XCJFQuery(query, zkHost, solrUrl, collection, fromField, toField, routedByJoinKey, ttl, otherParams); + return new CrossCollectionJoinQuery(query, zkHost, solrUrl, collection, fromField, toField, routedByJoinKey, ttl, otherParams); } } diff --git a/solr/core/src/java/org/apache/solr/search/join/XCJFQuery.java b/solr/core/src/java/org/apache/solr/search/join/CrossCollectionJoinQuery.java similarity index 94% rename from solr/core/src/java/org/apache/solr/search/join/XCJFQuery.java rename to solr/core/src/java/org/apache/solr/search/join/CrossCollectionJoinQuery.java index fbe12e8b39d..16bf6e29aa4 100644 --- a/solr/core/src/java/org/apache/solr/search/join/XCJFQuery.java +++ b/solr/core/src/java/org/apache/solr/search/join/CrossCollectionJoinQuery.java @@ -64,7 +64,7 @@ import org.apache.solr.search.DocSetUtil; import org.apache.solr.search.Filter; import org.apache.solr.search.SolrIndexSearcher; -public class XCJFQuery extends Query { +public class CrossCollectionJoinQuery extends Query { protected final String query; protected final String zkHost; @@ -80,8 +80,9 @@ public class XCJFQuery extends Query { protected SolrParams otherParams; protected String otherParamsString; - public XCJFQuery(String query, String zkHost, String solrUrl, String collection, String fromField, String toField, - boolean routedByJoinKey, int ttl, SolrParams otherParams) { + public CrossCollectionJoinQuery(String query, String zkHost, String solrUrl, + String collection, String fromField, String toField, + boolean routedByJoinKey, int ttl, SolrParams otherParams) { this.query = query; this.zkHost = zkHost; @@ -173,14 +174,14 @@ public class XCJFQuery extends Query { } } - private class XCJFQueryWeight extends ConstantScoreWeight { + private class CrossCollectionJoinQueryWeight extends ConstantScoreWeight { private SolrIndexSearcher searcher; private ScoreMode scoreMode; private Filter filter; - public XCJFQueryWeight(SolrIndexSearcher searcher, ScoreMode scoreMode, float score) { - super(XCJFQuery.this, score); + public CrossCollectionJoinQueryWeight(SolrIndexSearcher searcher, ScoreMode scoreMode, float score) { + super(CrossCollectionJoinQuery.this, score); this.scoreMode = scoreMode; this.searcher = searcher; } @@ -329,7 +330,7 @@ public class XCJFQuery extends Query { @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return new XCJFQueryWeight((SolrIndexSearcher) searcher, scoreMode, boost); + return new CrossCollectionJoinQueryWeight((SolrIndexSearcher) searcher, scoreMode, boost); } @Override @@ -359,7 +360,7 @@ public class XCJFQuery extends Query { equalsTo(getClass().cast(other)); } - private boolean equalsTo(XCJFQuery other) { + private boolean equalsTo(CrossCollectionJoinQuery other) { return Objects.equals(query, other.query) && Objects.equals(zkHost, other.zkHost) && Objects.equals(solrUrl, other.solrUrl) && diff --git a/solr/core/src/java/org/apache/solr/search/join/ScoreJoinQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/join/ScoreJoinQParserPlugin.java index 7bd78c0683f..423fd25f560 100644 --- a/solr/core/src/java/org/apache/solr/search/join/ScoreJoinQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/join/ScoreJoinQParserPlugin.java @@ -309,7 +309,7 @@ public class ScoreJoinQParserPlugin extends QParserPlugin { for (Slice slice : zkController.getClusterState().getCollection(fromIndex).getActiveSlicesArr()) { if (fromReplica != null) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, - "SolrCloud join: multiple shards not yet supported " + fromIndex); + "SolrCloud join: To join with a sharded collection, use method=crossCollection."); for (Replica replica : slice.getReplicas()) { if (replica.getNodeName().equals(nodeName)) { @@ -327,8 +327,7 @@ public class ScoreJoinQParserPlugin extends QParserPlugin { if (fromReplica == null) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, - "SolrCloud join: No active replicas for "+fromIndex+ - " found in node " + nodeName); + "SolrCloud join: To join with a collection that might not be co-located, use method=crossCollection."); return fromReplica; } diff --git a/solr/core/src/java/org/apache/solr/search/join/XCJFQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/join/XCJFQParserPlugin.java deleted file mode 100644 index a7300e6b112..00000000000 --- a/solr/core/src/java/org/apache/solr/search/join/XCJFQParserPlugin.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.solr.search.join; - -import java.util.HashSet; -import java.util.List; - -import org.apache.solr.common.StringUtils; -import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.request.SolrQueryRequest; -import org.apache.solr.search.QParser; -import org.apache.solr.search.QParserPlugin; - -/** - * Cross-collection join filter. Runs a query against a remote Solr collection to obtain a - * set of join keys, then applies that set of join keys as a filter against the local collection. - *
Example: {!xcjf collection="remoteCollection" from="fromField" to="toField" v="*:*"} - */ -public class XCJFQParserPlugin extends QParserPlugin { - - public static final String NAME = "xcjf"; - - private String routerField; - private HashSet solrUrlWhitelist; - - @Override - public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { - return new XCJFQParser(qstr, localParams, params, req, routerField, solrUrlWhitelist); - } - - @Override - @SuppressWarnings({"unchecked"}) - public void init(@SuppressWarnings({"rawtypes"})NamedList args) { - routerField = (String) args.get("routerField"); - solrUrlWhitelist = new HashSet<>(); - if (args.get("solrUrl") != null) { - for (String s : (List) args.get("solrUrl")) { - if (!StringUtils.isEmpty(s)) - solrUrlWhitelist.add(s); - } - } else { - solrUrlWhitelist = null; - } - } - - @Override - public String getName() { - return NAME; - } -} diff --git a/solr/core/src/test-files/solr/configsets/xcjf/conf/schema.xml b/solr/core/src/test-files/solr/configsets/ccjoin/conf/schema.xml similarity index 100% rename from solr/core/src/test-files/solr/configsets/xcjf/conf/schema.xml rename to solr/core/src/test-files/solr/configsets/ccjoin/conf/schema.xml diff --git a/solr/core/src/test-files/solr/configsets/xcjf/conf/solrconfig.xml b/solr/core/src/test-files/solr/configsets/ccjoin/conf/solrconfig.xml similarity index 70% rename from solr/core/src/test-files/solr/configsets/xcjf/conf/solrconfig.xml rename to solr/core/src/test-files/solr/configsets/ccjoin/conf/solrconfig.xml index 942e7f5db23..323d9f57b0e 100644 --- a/solr/core/src/test-files/solr/configsets/xcjf/conf/solrconfig.xml +++ b/solr/core/src/test-files/solr/configsets/ccjoin/conf/solrconfig.xml @@ -48,25 +48,15 @@ initialSize="0" regenerator="solr.NoOpRegenerator"/> - + product_id_s - - ${test.xcjf.solr.url.1:} - ${test.xcjf.solr.url.2:} - ${test.xcjf.solr.url.3:} - - - - - - - product_id_s - - ${test.xcjf.solr.url.1:} - ${test.xcjf.solr.url.2:} - ${test.xcjf.solr.url.3:} + + ${test.ccjoin.solr.url.1:} + ${test.ccjoin.solr.url.2:} + ${test.ccjoin.solr.url.3:} + diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java index a44d51d7b92..a21eed2e861 100644 --- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java +++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java @@ -1293,12 +1293,6 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { ); } - public void testXCJFQuery() throws Exception { - assertQueryEquals("xcjf", - "{!xcjf collection=abc from=x_id to=x_id}*:*", - "{!xcjf collection=abc from=x_id to=x_id v='*:*'}"); - } - public void testHashRangeQuery() throws Exception { assertQueryEquals("hash_range", "{!hash_range f=x_id l=107347968 u=214695935}", diff --git a/solr/core/src/test/org/apache/solr/search/join/XCJFQueryTest.java b/solr/core/src/test/org/apache/solr/search/join/CrossCollectionJoinQueryTest.java similarity index 68% rename from solr/core/src/test/org/apache/solr/search/join/XCJFQueryTest.java rename to solr/core/src/test/org/apache/solr/search/join/CrossCollectionJoinQueryTest.java index c58ccb56edd..ebdb960164a 100644 --- a/solr/core/src/test/org/apache/solr/search/join/XCJFQueryTest.java +++ b/solr/core/src/test/org/apache/solr/search/join/CrossCollectionJoinQueryTest.java @@ -36,7 +36,7 @@ import org.apache.solr.common.params.ModifiableSolrParams; import org.junit.BeforeClass; import org.junit.Test; -public class XCJFQueryTest extends SolrCloudTestCase { +public class CrossCollectionJoinQueryTest extends SolrCloudTestCase { private static final int NUM_NODES = 3; private static final int NUM_SHARDS = 3; @@ -48,15 +48,15 @@ public class XCJFQueryTest extends SolrCloudTestCase { @BeforeClass public static void setupCluster() throws Exception { configureCluster(NUM_NODES) - .addConfig("xcjf", configset("xcjf")) + .addConfig("ccjoin", configset("ccjoin")) .withSolrXml(TEST_PATH().resolve("solr.xml")) .configure(); - CollectionAdminRequest.createCollection("products", "xcjf", NUM_SHARDS, NUM_REPLICAS) + CollectionAdminRequest.createCollection("products", "ccjoin", NUM_SHARDS, NUM_REPLICAS) .process(cluster.getSolrClient()); - CollectionAdminRequest.createCollection("parts", "xcjf", NUM_SHARDS, NUM_REPLICAS) + CollectionAdminRequest.createCollection("parts", "ccjoin", NUM_SHARDS, NUM_REPLICAS) .process(cluster.getSolrClient()); } @@ -127,100 +127,100 @@ public class XCJFQueryTest extends SolrCloudTestCase { } @Test - public void testXcjfRoutedCollection() throws Exception { + public void testCcJoinRoutedCollection() throws Exception { setupIndexes(true); - testXcjfQuery("{!xcjf collection=products from=product_id_i to=product_id_i}size_s:M",true); + testCcJoinQuery("{!join method=crossCollection fromIndex=products from=product_id_i to=product_id_i}size_s:M", true); int i = 0; for (JettySolrRunner runner : cluster.getJettySolrRunners()) { i++; String url = runner.getBaseUrl().toString(); - System.setProperty("test.xcjf.solr.url." + i, url); + System.setProperty("test.ccjoin.solr.url." + i, url); } try { // now we need to re-upload our config , now that we know a valid solr url for the cluster. CloudSolrClient client = cluster.getSolrClient(); - ((ZkClientClusterStateProvider) client.getClusterStateProvider()).uploadConfig(configset("xcjf"), "xcjf"); - // reload the cores with the updated whitelisted solr url config. + ((ZkClientClusterStateProvider) client.getClusterStateProvider()).uploadConfig(configset("ccjoin"), "ccjoin"); + // reload the cores with the updated allowSolrUrls config. CollectionAdminRequest.Reload.reloadCollection("products").process(client); CollectionAdminRequest.Reload.reloadCollection("parts").process(client); Thread.sleep(10000); - testXcjfQuery("{!xcjf collection=products from=product_id_i to=product_id_i}size_s:M",true); + testCcJoinQuery("{!join method=crossCollection fromIndex=products from=product_id_i to=product_id_i}size_s:M", true); - testXcjfQuery(String.format(Locale.ROOT, - "{!xcjf solrUrl=\"%s\" collection=products from=product_id_i to=product_id_i}size_s:M", getSolrUrl()), + testCcJoinQuery(String.format(Locale.ROOT, + "{!join method=crossCollection solrUrl=\"%s\" fromIndex=products from=product_id_i to=product_id_i}size_s:M", getSolrUrl()), true); - testXcjfQuery("{!xcjf collection=products from=product_id_l to=product_id_l}size_s:M", + testCcJoinQuery("{!join method=crossCollection fromIndex=products from=product_id_l to=product_id_l}size_s:M", true); - testXcjfQuery(String.format(Locale.ROOT, - "{!xcjf solrUrl=\"%s\" collection=products from=product_id_l to=product_id_l}size_s:M", + testCcJoinQuery(String.format(Locale.ROOT, + "{!join method=crossCollection solrUrl=\"%s\" fromIndex=products from=product_id_l to=product_id_l}size_s:M", getSolrUrl()), true); - testXcjfQuery("{!xcjf collection=products from=product_id_s to=product_id_s}size_s:M", + testCcJoinQuery("{!join method=crossCollection fromIndex=products from=product_id_s to=product_id_s}size_s:M", true); - testXcjfQuery(String.format(Locale.ROOT, - "{!xcjf solrUrl=\"%s\" collection=products from=product_id_s to=product_id_s}size_s:M", + testCcJoinQuery(String.format(Locale.ROOT, + "{!join method=crossCollection solrUrl=\"%s\" fromIndex=products from=product_id_s to=product_id_s}size_s:M", getSolrUrl()), true); - testXcjfQuery(String.format(Locale.ROOT, - "{!xcjf zkHost=\"%s\" collection=products from=product_id_s to=product_id_s}size_s:M", + testCcJoinQuery(String.format(Locale.ROOT, + "{!join method=crossCollection zkHost=\"%s\" fromIndex=products from=product_id_s to=product_id_s}size_s:M", cluster.getSolrClient().getZkHost()), true); - // Test the ability to set other parameters on xcjf and have them passed through + // Test the ability to set other parameters on crossCollection join and have them passed through assertResultCount("parts", - "{!xcjf collection=products from=product_id_s to=product_id_s fq=product_id_s:1}size_s:M", + "{!join method=crossCollection fromIndex=products from=product_id_s to=product_id_s fq=product_id_s:1}size_s:M", 2, true); assertResultCount("parts", String.format(Locale.ROOT, - "{!xcjf solrUrl=\"%s\" collection=products from=product_id_s to=product_id_s fq=product_id_s:1}size_s:M", + "{!join method=crossCollection solrUrl=\"%s\" fromIndex=products from=product_id_s to=product_id_s fq=product_id_s:1}size_s:M", getSolrUrl()), 2, true); } finally { for (JettySolrRunner runner : cluster.getJettySolrRunners()) { i++; - System.getProperties().remove("test.xcjf.solr.url." + i); + System.getProperties().remove("test.ccjoin.solr.url." + i); } } } @Test - public void testXcjfNonroutedCollection() throws Exception { + public void testCcJoinNonroutedCollection() throws Exception { setupIndexes(false); // This query will expect the collection to have been routed on product_id, so it should return // incomplete results. - testXcjfQuery("{!xcjf collection=products from=product_id_s to=product_id_s}size_s:M", + testCcJoinQuery("{!join method=crossCollection fromIndex=products from=product_id_s to=product_id_s}size_s:M", false); // Now if we set routed=false we should get a complete set of results. - testXcjfQuery("{!xcjf collection=products from=product_id_s to=product_id_s routed=false}size_s:M", + testCcJoinQuery("{!join method=crossCollection fromIndex=products from=product_id_s to=product_id_s routed=false}size_s:M", true); - // The xcjf_nonrouted query parser doesn't assume that the collection was routed on product_id, + // The join_nonrouted query parser doesn't assume that the collection was routed on product_id, // so we should get the full set of results. - testXcjfQuery("{!xcjf_nonrouted collection=products from=product_id_s to=product_id_s}size_s:M", + testCcJoinQuery("{!join_nonrouted method=crossCollection fromIndex=products from=product_id_s to=product_id_s}size_s:M", true); // But if we set routed=true, we are now assuming again that the collection was routed on product_id, // so we should get incomplete results. - testXcjfQuery("{!xcjf_nonrouted collection=products from=product_id_s to=product_id_s routed=true}size_s:M", + testCcJoinQuery("{!join_nonrouted method=crossCollection fromIndex=products from=product_id_s to=product_id_s routed=true}size_s:M", false); } @Test - public void testSolrUrlWhitelist() throws Exception { + public void testAllowSolrUrlsList() throws Exception { setupIndexes(false); - // programmatically add the current jetty solr url to the solrUrl whitelist property in the solrconfig.xml + // programmatically add the current jetty solr url to the allowSolrUrls property in the solrconfig.xml int i = 0; for (JettySolrRunner runner : cluster.getJettySolrRunners()) { i++; - System.setProperty("test.xcjf.solr.url." + i, runner.getBaseUrl().toString()); + System.setProperty("test.ccjoin.solr.url." + i, runner.getBaseUrl().toString()); } try { // now we need to re-upload our config , now that we know a valid solr url for the cluster. CloudSolrClient client = cluster.getSolrClient(); - ((ZkClientClusterStateProvider) client.getClusterStateProvider()).uploadConfig(configset("xcjf"), "xcjf"); - // reload the cores with the updated whitelisted solr url config. + ((ZkClientClusterStateProvider) client.getClusterStateProvider()).uploadConfig(configset("ccjoin"), "ccjoin"); + // reload the cores with the updated allowSolrUrls config. CollectionAdminRequest.Reload.reloadCollection("products").process(client); CollectionAdminRequest.Reload.reloadCollection("parts").process(client); @@ -232,33 +232,33 @@ public class XCJFQueryTest extends SolrCloudTestCase { // we expect an exception because bogus url isn't valid. try { // This should throw an exception. - // verify the xcfj_whitelist definition has the current valid urls and works. - testXcjfQuery(String.format(Locale.ROOT, - "{!xcjf_whitelist solrUrl=\"%s\" collection=products from=product_id_i to=product_id_i}size_s:M", + // verify the join plugin definition has the current valid urls and works. + testCcJoinQuery(String.format(Locale.ROOT, + "{!join method=crossCollection solrUrl=\"%s\" fromIndex=products from=product_id_i to=product_id_i}size_s:M", "http://bogus.example.com:8983/solr"), true); fail("The query invovling bogus.example.com should not succeed"); } catch (Exception e) { // should get here. String message = e.getMessage(); - assertTrue("message was " + message, message.contains("SyntaxError: Solr Url was not in the whitelist")); + assertTrue("message was " + message, message.contains("SyntaxError: Solr URL was not in allowSolrUrls list")); } - // verify the xcfj_whitelist definition has the current valid urls and works. - testXcjfQuery(String.format(Locale.ROOT, - "{!xcjf_whitelist solrUrl=\"%s\" collection=products from=product_id_i to=product_id_i}size_s:M", + // verify the join plugin definition has the current valid urls and works. + testCcJoinQuery(String.format(Locale.ROOT, + "{!join method=crossCollection solrUrl=\"%s\" fromIndex=products from=product_id_i to=product_id_i}size_s:M", getSolrUrl()), true); } finally { for (JettySolrRunner runner : cluster.getJettySolrRunners()) { i++; - System.getProperties().remove("test.xcjf.solr.url." + i); + System.getProperties().remove("test.ccjoin.solr.url." + i); } } } - public void testXcjfQuery(String query, boolean expectFullResults) throws Exception { + public void testCcJoinQuery(String query, boolean expectFullResults) throws Exception { assertResultCount("parts", query, NUM_PRODUCTS / 2, expectFullResults); } diff --git a/solr/solr-ref-guide/src/other-parsers.adoc b/solr/solr-ref-guide/src/other-parsers.adoc index 7cd93ac0040..e41b1d18256 100644 --- a/solr/solr-ref-guide/src/other-parsers.adoc +++ b/solr/solr-ref-guide/src/other-parsers.adoc @@ -551,9 +551,9 @@ http://localhost:8983/solr/alt_graph/query?fl=id&q={!graph+from=id+to=out_edge+m ---- == Hash Range Query Parser -The hash range query parser will return documents that have a field that contains a value that would be hashed to a particular range. This is used by the XCJF query parser. This query parser has a per segment cache for each field that this query parser will operate on. +The hash range query parser will return documents that have a field that contains a value that would be hashed to a particular range. This is used by the join query when using method=crossCollection. The hash rang query parser has a per segment cache for each field that this query parser will operate on. -When specifying a min/max hash range and a field name with the hash range query parser, only documents who contain a field value that hashes into that range will be returned. If you want to query for a very large result set, you can query for various hash ranges to return a fraction of the documents with each range request. In the XCJF case, the hash_range query parser is used to ensure that each shard only gets the set of join keys that would end up on that shard. +When specifying a min/max hash range and a field name with the hash range query parser, only documents who contain a field value that hashes into that range will be returned. If you want to query for a very large result set, you can query for various hash ranges to return a fraction of the documents with each range request. In the cross collection join case, the hash_range query parser is used to ensure that each shard only gets the set of join keys that would end up on that shard. This query parser uses the MurmurHash3_x86_32. This is the same as the default hashing for the default composite ID router in Solr. @@ -637,7 +637,7 @@ This parameter is required. The name of the index to run the "from" query (`v` parameter) on and where "from" values are gathered. Must be located on the same node as the core processing the request. This parameter is optional; it defaults to the value of the processing core if not specified. -See <> below for more information. +See <> or <> below for more information. `score`:: An optional parameter that instructs Solr to return information about the "from" query scores. @@ -681,9 +681,9 @@ But they are also expensive to build and need to be lazily populated after each If you commit frequently and your use-case can tolerate a static warming query, consider adding one to `solrconfig.xml` so that this work is done as a part of the commit itself and not attached directly to user requests. Consider this method when the "from" query matches a large number of documents and the "to" result set is small to moderate in size, but only if sporadic post-commit slowness is tolerable. -=== Joining Across Collections +=== Joining Across Single Shard Collections -You can also specify a `fromIndex` parameter to join with a field from another core or collection. If running in SolrCloud mode, then the collection specified in the `fromIndex` parameter must have a single shard and a replica on all Solr nodes where the collection you're joining to has a replica. +You can also specify a `fromIndex` parameter to join with a field from another core or a single shard collection. If running in SolrCloud mode, then the collection specified in the `fromIndex` parameter must have a single shard and a replica on all Solr nodes where the collection you're joining to has a replica. Let's consider an example where you want to use a Solr join query to filter movies by directors that have won an Oscar. Specifically, imagine we have two collections with the following fields: @@ -724,6 +724,92 @@ At query time, the `JoinQParser` will access the local replica of the *movie_dir For more information, Erick Erickson has written a blog post about join performance titled https://lucidworks.com/2012/06/20/solr-and-joins/[Solr and Joins]. +=== Cross Collection Join +The Cross Collection Join Filter is a method for the join parser that will execute a query against a remote Solr collection to get back a set of join keys that will be used to as a filter query against the local Solr collection. + +The crossCollection join query will create an CrossCollectionQuery object. +The CrossCollectionQuery will first query a remote Solr collection and get back a streaming expression result of the join keys. +As the join keys are streamed to the node, a bitset of the matching documents in the local index is built up. +This avoids keeping the full set of join keys in memory at any given time. +This bitset is then inserted into the filter cache upon successful execution as with the normal behavior of the Solr filter cache. + +If the local index is sharded according to the join key field, the cross collection join can leverage a secondary query parser called the "hash_range" query parser. +The hash_range query parser is responsible for returning only the documents that hash to a given range of values. +This allows the CrossCollectionQuery to query the remote Solr collection and return only the join keys that would match a specific shard in the local Solr collection. +This has the benefit of making sure that network traffic doesn't increase as the number of shards increases and allows for much greater scalability. + +The CrossCollection join query works with both String and Point types of fields. +The fields that are being used for the join key must be single-valued and have docValues enabled. + +It's advised to shard the local collection by the join key as this allows for the optimization mentioned above to be utilized. + +The cross collection join queries should not be generally used as part of the `q` parameter, but rather it is designed to be used as a filter query (`fq` parameter) to ensure proper caching. + +The remote Solr collection that is being queried should have a single-valued field for the join key with docValues enabled. + +The remote Solr collection does not have any specific sharding requirements. + +==== Join Query Parser Definition in solrconfig.xml + +The cross collection join has some configuration options that can be specified in `solrconfig.xml`. + +`routerField`:: +If the documents are routed to shards using the CompositeID router by the join field, then that field name should be specified in the configuration here. This will allow the parser to optimize the resulting HashRange query. + +`solrUrl`:: +If specified, this array of strings specifies the white listed Solr URLs that you can pass to the solrUrl query parameter. Without this configuration the solrUrl parameter cannot be used. This restriction is necessary to prevent an attacker from using Solr to explore the network. + +[source,xml] +---- + + product_id_s + + http://othersolr.example.com:8983/solr + + +---- + +==== Cross Collection Join Query Parameters + +`fromIndex`:: +The name of the external Solr collection to be queried to retrieve the set of join key values (required). + +`zkHost`:: +The connection string to be used to connect to ZooKeeper. `zkHost` and `solrUrl` are both optional parameters, and at most one of them should be specified. If neither `zkHost` nor `solrUrl` are specified, the local ZooKeeper cluster will be used. (optional). + +`solrUrl`:: +The URL of the external Solr node to be queried. Must be a character for character exact match of a whitelisted url. (optional, disabled by default for security). + +`from`:: +The join key field name in the external collection (required). + +`to`:: +The join key field name in the local collection. + +`v`:: +The query substituted in as a local param. This is the query string that will match documents in the remote collection. + +`routed`:: +If `true`, the cross collection join query will use each shard's hash range to determine the set of join keys to retrieve for that shard. +This parameter improves the performance of the cross-collection join, but it depends on the local collection being routed by the `to` field. +If this parameter is not specified, the cross collection join query will try to determine the correct value automatically. + +`ttl`:: +The length of time that a cross colleciton join query in the cache will be considered valid, in seconds. +Defaults to `3600` (one hour). +The cross collection join query will not be aware of changes to the remote collection, so if the remote collection is updated, cached cross collection queries may give inaccurate results. +After the `ttl` period has expired, the cross collection join query will re-execute the join against the remote collection. + +Other Parameters:: +Any normal Solr query parameter can also be specified/passed through as a local param. + +==== Cross Collection Query Examples + +[source,text] +---- +http://localhost:8983/solr/localCollection/query?fl=id&q={!join method="crossCollection" fromIndex="otherCollection" from="fromField" to="toField" v="*:*"} +---- + == Lucene Query Parser The `LuceneQParser` extends the `QParserPlugin` by parsing Solr's variant on the Lucene QueryParser syntax. This is effectively the same query parser that is used in Lucene. It uses the operators `q.op`, the default operator ("OR" or "AND") and `df`, the default field name. @@ -1116,94 +1202,6 @@ An optional parameter used to determine which of several query implementations s {!terms f=categoryId method=booleanQuery separator=" "}8 6 7 5309 ---- -== XCJF Query Parser -The Cross Collection Join Filter (XCJF) is a query parser plugin that will execute a query against a remote Solr collection to get back a set of join keys that will be used to as a filter query against the local Solr collection. - -The XCJF parser will create an XCJFQuery object. -The XCJFQuery will first query a remote Solr collection and get back a streaming expression result of the join keys. -As the join keys are streamed to the node, a bitset of the matching documents in the local index is built up. -This avoids keeping the full set of join keys in memory at any given time. -This bitset is then inserted into the filter cache upon successful execution as with the normal behavior of the Solr filter cache. - -If the local index is sharded according to the join key field, the XCJF parser can leverage a secondary query parser called the "hash_range" query parser. -The hash_range query parser is responsible for returning only the documents that hash to a given range of values. -This allows the XCJFQuery to query the remote Solr collection and return only the join keys that would match a specific shard in the local Solr collection. -This has the benefit of making sure that network traffic doesn't increase as the number of shards increases and allows for much greater scalability. - -The XCJF parser works with both String and Point types of fields. -The fields that are being used for the join key must be single-valued and have docValues enabled. - -It's advised to shard the local collection by the join key as this allows for the optimization mentioned above to be utilized. - -The XCJF parser should not be generally used as part of the `q` parameter, but rather it is designed to be used as a filter query (`fq` parameter) to ensure proper caching. - -The remote Solr collection that is being queried should have a single-valued field for the join key with docValues enabled. - -The remote Solr collection does not have any specific sharding requirements. - -=== XCJF Query Parser Definition in solrconfig.xml - -The XCJF has some configuration options that can be specified in `solrconfig.xml`. - -`routerField`:: -If the documents are routed to shards using the CompositeID router by the join field, then that field name should be specified in the configuration here. This will allow the parser to optimize the resulting HashRange query. - -`solrUrl`:: -If specified, this array of strings specifies the white listed Solr URLs that you can pass to the solrUrl query parameter. Without this configuration the solrUrl parameter cannot be used. This restriction is necessary to prevent an attacker from using Solr to explore the network. - -[source,xml] ----- - - - joinfield_id_s - - - http://othersolr.example.com:8983/solr - - ----- - -=== XCJF Query Parameters - -`collection`:: -The name of the external Solr collection to be queried to retrieve the set of join key values (required). - -`zkHost`:: -The connection string to be used to connect to ZooKeeper. `zkHost` and `solrUrl` are both optional parameters, and at most one of them should be specified. If neither `zkHost` nor `solrUrl` are specified, the local ZooKeeper cluster will be used. (optional). - -`solrUrl`:: -The URL of the external Solr node to be queried. Must be a character for character exact match of a whitelisted url. (optional, disabled by default for security). - -`from`:: -The join key field name in the external collection (required). - -`to`:: -The join key field name in the local collection. - -`v`:: -The query substituted in as a local param. This is the query string that will match documents in the remote collection. - -`routed`:: -If `true`, the XCJF query will use each shard's hash range to determine the set of join keys to retrieve for that shard. -This parameter improves the performance of the cross-collection join, but it depends on the local collection being routed by the `to` field. -If this parameter is not specified, the XCJF query will try to determine the correct value automatically. - -`ttl`:: -The length of time that an XCJF query in the cache will be considered valid, in seconds. -Defaults to `3600` (one hour). -The XCJF query will not be aware of changes to the remote collection, so if the remote collection is updated, cached XCJF queries may give inaccurate results. -After the `ttl` period has expired, the XCJF query will re-execute the join against the remote collection. - -Other Parameters:: -Any normal Solr query parameter can also be specified/passed through as a local param. - -=== XCJF Query Examples - -[source,text] ----- -http://localhost:8983/solr/localCollection/query?fl=id&q={!xcjf collection="otherCollection" from="fromField" to="toField" v="*:*"} ----- - == XML Query Parser The {solr-javadocs}/solr-core/org/apache/solr/search/XmlQParserPlugin.html[XmlQParserPlugin] extends the {solr-javadocs}/solr-core/org/apache/solr/search/QParserPlugin.html[QParserPlugin] and supports the creation of queries from XML. Example: