diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index fceca330d4f..55e7f24615d 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -81,6 +81,8 @@ New Features * SOLR-14130: Add postlogs command line tool for indexing Solr logs (Joel Bernstein) + * SOLR-13749: New cross collection join filter (XCJF) (Dan Fox, Kevin Watters, via Gus Heck) + Improvements --------------------- * SOLR-14120: Define JavaScript methods 'includes' and 'startsWith' to ensure AdminUI can be displayed when using diff --git a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java index d602c7aab9f..d17422cc760 100644 --- a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java @@ -29,6 +29,8 @@ import org.apache.solr.search.join.BlockJoinChildQParserPlugin; import org.apache.solr.search.join.BlockJoinParentQParserPlugin; import org.apache.solr.search.join.FiltersQParserPlugin; import org.apache.solr.search.join.GraphQParserPlugin; +import org.apache.solr.search.join.HashRangeQParserPlugin; +import org.apache.solr.search.join.XCJFQParserPlugin; import org.apache.solr.search.mlt.MLTQParserPlugin; import org.apache.solr.util.plugin.NamedListInitializedPlugin; @@ -85,6 +87,8 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin, SolrI map.put(PayloadCheckQParserPlugin.NAME, new PayloadCheckQParserPlugin()); map.put(BoolQParserPlugin.NAME, new BoolQParserPlugin()); map.put(MinHashQParserPlugin.NAME, new MinHashQParserPlugin()); + map.put(XCJFQParserPlugin.NAME, new XCJFQParserPlugin()); + map.put(HashRangeQParserPlugin.NAME, new HashRangeQParserPlugin()); standardPlugins = Collections.unmodifiableMap(map); } diff --git a/solr/core/src/java/org/apache/solr/search/join/HashRangeQParser.java b/solr/core/src/java/org/apache/solr/search/join/HashRangeQParser.java new file mode 100644 index 00000000000..a427160182b --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/join/HashRangeQParser.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search.join; + +import org.apache.lucene.search.Query; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.QParser; +import org.apache.solr.search.SyntaxError; + +@SuppressWarnings("WeakerAccess") +public class HashRangeQParser extends QParser { + + public static final String FIELD = "f"; + public static final String LOWER_BOUND = "l"; + public static final String UPPER_BOUND = "u"; + + public HashRangeQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { + super(qstr, localParams, params, req); + } + + @Override + public Query parse() throws SyntaxError { + String field = localParams.get(FIELD); + int lower = localParams.getInt(LOWER_BOUND); + int upper = localParams.getInt(UPPER_BOUND); + + return new HashRangeQuery(field, lower, upper); + } +} diff --git a/solr/core/src/java/org/apache/solr/search/join/HashRangeQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/join/HashRangeQParserPlugin.java new file mode 100644 index 00000000000..069860a9df7 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/join/HashRangeQParserPlugin.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search.join; + +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.QParser; +import org.apache.solr.search.QParserPlugin; + +/** + * Matches documents where the specified field hashes to a value within the given range. + *
Can be used to create a filter that will only match documents falling within a certain shard's hash range. + */ +public class HashRangeQParserPlugin extends QParserPlugin { + + public static final String NAME = "hash_range"; + + @Override + public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { + return new HashRangeQParser(qstr, localParams, params, req); + } + + @Override + public String getName() { + return NAME; + } +} diff --git a/solr/core/src/java/org/apache/solr/search/join/HashRangeQuery.java b/solr/core/src/java/org/apache/solr/search/join/HashRangeQuery.java new file mode 100644 index 00000000000..d8a339b0921 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/join/HashRangeQuery.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search.join; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.search.*; +import org.apache.lucene.util.BytesRef; +import org.apache.solr.common.util.Hash; +import org.apache.solr.search.SolrCache; +import org.apache.solr.search.SolrIndexSearcher; + +import java.io.IOException; +import java.util.Locale; +import java.util.Objects; + +public class HashRangeQuery extends Query { + + protected final String field; + protected final int lower; + protected final int upper; + + public static final String CACHE_KEY_PREFIX = "hash_"; + + public HashRangeQuery(String field, int lower, int upper) { + this.field = field; + this.lower = lower; + this.upper = upper; + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new ConstantScoreWeight(this, boost) { + + @Override + public boolean isCacheable(LeafReaderContext context) { + return DocValues.isCacheable(context, field); + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + SortedDocValues docValues = context.reader().getSortedDocValues(field); + int[] cache = getCache(context); + + TwoPhaseIterator iterator = new TwoPhaseIterator(docValues) { + @Override + public boolean matches() throws IOException { + int hash = cache != null ? cache[docValues.docID()] : hash(docValues); + return hash >= lower && hash <= upper; + } + + @Override + public float matchCost() { + return cache != null ? 2 : 100; + } + }; + + return new ConstantScoreScorer(this, boost, scoreMode, iterator); + } + + private int[] getCache(LeafReaderContext context) throws IOException { + IndexReader.CacheHelper cacheHelper = context.reader().getReaderCacheHelper(); + if (cacheHelper == null) { + return null; + } + @SuppressWarnings("unchecked") + final SolrCache cache = + ((SolrIndexSearcher) searcher).getCache(CACHE_KEY_PREFIX + field); + if (cache == null) { + return null; + } + + IndexReader.CacheKey cacheKey = cacheHelper.getKey(); + synchronized (cacheKey) { + int[] hashes = cache.get(cacheKey); + if (hashes == null) { + hashes = new int[context.reader().maxDoc()]; + SortedDocValues docValues = context.reader().getSortedDocValues(field); + int doc; + while ((doc = docValues.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + hashes[doc] = hash(docValues); + } + cache.put(cacheKey, hashes); + } + return hashes; + } + } + + private int hash(SortedDocValues docValues) throws IOException { + BytesRef bytesRef = docValues.binaryValue(); + return Hash.murmurhash3_x86_32(bytesRef.bytes, bytesRef.offset, bytesRef.length, 0); + } + }; + } + + @Override + public void visit(QueryVisitor visitor) { + visitor.visitLeaf(this); + } + + @Override + public String toString(String field) { + return String.format(Locale.ROOT, "{!hash_range f=%s l=%d u=%d}", this.field, lower, upper); + } + + @Override + public boolean equals(Object other) { + return sameClassAs(other) && + equalsTo(getClass().cast(other)); + } + + private boolean equalsTo(HashRangeQuery other) { + return Objects.equals(field, other.field) && + Objects.equals(lower, other.lower) && + Objects.equals(upper, other.upper); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = classHash(); + result = prime * result + Objects.hashCode(field); + result = prime * result + Objects.hashCode(lower); + result = prime * result + Objects.hashCode(upper); + return result; + } +} diff --git a/solr/core/src/java/org/apache/solr/search/join/XCJFQParser.java b/solr/core/src/java/org/apache/solr/search/join/XCJFQParser.java new file mode 100644 index 00000000000..9be3c3d651b --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/join/XCJFQParser.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search.join; + +import org.apache.lucene.search.Query; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.QParser; +import org.apache.solr.search.QueryParsing; +import org.apache.solr.search.SyntaxError; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; + +@SuppressWarnings("WeakerAccess") +public class XCJFQParser extends QParser { + + public static final String ZK_HOST = "zkHost"; + public static final String SOLR_URL = "solrUrl"; + public static final String COLLECTION = "collection"; + public static final String FROM = "from"; + public static final String TO = "to"; + public static final String ROUTED_BY_JOIN_KEY = "routed"; + public static final String TTL = "ttl"; + + public static final int TTL_DEFAULT = 60 * 60; // in seconds + + private static final Set OWN_PARAMS = new HashSet<>(Arrays.asList( + QueryParsing.TYPE, QueryParsing.V, ZK_HOST, SOLR_URL, COLLECTION, FROM, TO, ROUTED_BY_JOIN_KEY, TTL)); + + private final String routerField; + private final Set solrUrlWhitelist; + + public XCJFQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req, String routerField, Set solrUrlWhiteList) { + super(qstr, localParams, params, req); + this.routerField = routerField; + // If specified in the config, this will limit which solr url's the parser can connect to. + this.solrUrlWhitelist = solrUrlWhiteList; + } + + @Override + public Query parse() throws SyntaxError { + String query = localParams.get(QueryParsing.V); + String zkHost = localParams.get(ZK_HOST); + String solrUrl = localParams.get(SOLR_URL); + // Test if this is a valid solr url. + if (solrUrl != null) { + if (solrUrlWhitelist == null) { + throw new SyntaxError("White list must be configured to use solrUrl parameter."); + } + if (!solrUrlWhitelist.contains(solrUrl)) { + throw new SyntaxError("Solr Url was not in the whitelist. Please check your configuration."); + } + } + + String collection = localParams.get(COLLECTION); + String fromField = localParams.get(FROM); + String toField = localParams.get(TO); + boolean routedByJoinKey = localParams.getBool(ROUTED_BY_JOIN_KEY, toField.equals(routerField)); + int ttl = localParams.getInt(TTL, TTL_DEFAULT); + + ModifiableSolrParams otherParams = new ModifiableSolrParams(); + for (Iterator it = localParams.getParameterNamesIterator(); it.hasNext(); ) { + String paramName = it.next(); + if (!OWN_PARAMS.contains(paramName)) { + otherParams.set(paramName, localParams.getParams(paramName)); + } + } + + return new XCJFQuery(query, zkHost, solrUrl, collection, fromField, toField, routedByJoinKey, ttl, otherParams); + } +} diff --git a/solr/core/src/java/org/apache/solr/search/join/XCJFQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/join/XCJFQParserPlugin.java new file mode 100644 index 00000000000..d8323915f87 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/join/XCJFQParserPlugin.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search.join; + +import java.util.HashSet; +import java.util.List; + +import org.apache.solr.common.StringUtils; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.QParser; +import org.apache.solr.search.QParserPlugin; + +/** + * Cross-collection join filter. Runs a query against a remote Solr collection to obtain a + * set of join keys, then applies that set of join keys as a filter against the local collection. + *
Example: {!xcjf collection="remoteCollection" from="fromField" to="toField" v="*:*"} + */ +public class XCJFQParserPlugin extends QParserPlugin { + + public static final String NAME = "xcjf"; + + private String routerField; + private HashSet solrUrlWhitelist; + + @Override + public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { + return new XCJFQParser(qstr, localParams, params, req, routerField, solrUrlWhitelist); + } + + @Override + public void init(NamedList args) { + routerField = (String) args.get("routerField"); + solrUrlWhitelist = new HashSet<>(); + if (args.get("solrUrl") != null) { + //noinspection unchecked + for (String s : (List) args.get("solrUrl")) { + if (!StringUtils.isEmpty(s)) + solrUrlWhitelist.add(s); + } + } else { + solrUrlWhitelist = null; + } + } + + @Override + public String getName() { + return NAME; + } +} diff --git a/solr/core/src/java/org/apache/solr/search/join/XCJFQuery.java b/solr/core/src/java/org/apache/solr/search/join/XCJFQuery.java new file mode 100644 index 00000000000..e6d3409020d --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/join/XCJFQuery.java @@ -0,0 +1,380 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search.join; + +import java.io.IOException; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.TimeUnit; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; +import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.FixedBitSet; +import org.apache.solr.client.solrj.io.SolrClientCache; +import org.apache.solr.client.solrj.io.Tuple; +import org.apache.solr.client.solrj.io.eq.FieldEqualitor; +import org.apache.solr.client.solrj.io.stream.CloudSolrStream; +import org.apache.solr.client.solrj.io.stream.SolrStream; +import org.apache.solr.client.solrj.io.stream.StreamContext; +import org.apache.solr.client.solrj.io.stream.TupleStream; +import org.apache.solr.client.solrj.io.stream.UniqueStream; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionNamedParameter; +import org.apache.solr.cloud.CloudDescriptor; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.DocRouter; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.schema.FieldType; +import org.apache.solr.search.BitDocSet; +import org.apache.solr.search.DocSet; +import org.apache.solr.search.DocSetUtil; +import org.apache.solr.search.Filter; +import org.apache.solr.search.SolrIndexSearcher; + +public class XCJFQuery extends Query { + + protected final String query; + protected final String zkHost; + protected final String solrUrl; + protected final String collection; + protected final String fromField; + protected final String toField; + protected final boolean routedByJoinKey; + + protected final long timestamp; + protected final int ttl; + + protected SolrParams otherParams; + protected String otherParamsString; + + public XCJFQuery(String query, String zkHost, String solrUrl, String collection, String fromField, String toField, + boolean routedByJoinKey, int ttl, SolrParams otherParams) { + + this.query = query; + this.zkHost = zkHost; + this.solrUrl = solrUrl; + this.collection = collection; + this.fromField = fromField; + this.toField = toField; + this.routedByJoinKey = routedByJoinKey; + + this.timestamp = System.nanoTime(); + this.ttl = ttl; + + this.otherParams = otherParams; + // SolrParams doesn't implement equals(), so use this string to compare them + if (otherParams != null) { + this.otherParamsString = otherParams.toString(); + } + } + + private interface JoinKeyCollector { + void collect(Object value) throws IOException; + DocSet getDocSet() throws IOException; + } + + private class TermsJoinKeyCollector implements JoinKeyCollector { + + FieldType fieldType; + SolrIndexSearcher searcher; + + TermsEnum termsEnum; + BytesRefBuilder bytes; + PostingsEnum postingsEnum; + + FixedBitSet bitSet; + + public TermsJoinKeyCollector(FieldType fieldType, Terms terms, SolrIndexSearcher searcher) throws IOException { + this.fieldType = fieldType; + this.searcher = searcher; + + termsEnum = terms.iterator(); + bytes = new BytesRefBuilder(); + + bitSet = new FixedBitSet(searcher.maxDoc()); + } + + @Override + public void collect(Object value) throws IOException { + fieldType.readableToIndexed((String) value, bytes); + if (termsEnum.seekExact(bytes.get())) { + postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); + bitSet.or(postingsEnum); + } + } + + @Override + public DocSet getDocSet() throws IOException { + if (searcher.getIndexReader().hasDeletions()) { + bitSet.and(searcher.getLiveDocSet().getBits()); + } + return new BitDocSet(bitSet); + } + } + + private class PointJoinKeyCollector extends GraphPointsCollector implements JoinKeyCollector { + + SolrIndexSearcher searcher; + + public PointJoinKeyCollector(SolrIndexSearcher searcher) { + super(searcher.getSchema().getField(toField), null, null); + this.searcher = searcher; + } + + @Override + public void collect(Object value) throws IOException { + if (value instanceof Long || value instanceof Integer) { + set.add(((Number) value).longValue()); + } else { + throw new UnsupportedOperationException("Unsupported field type for XCJFQuery"); + } + } + + @Override + public DocSet getDocSet() throws IOException { + Query query = getResultQuery(searcher.getSchema().getField(toField), false); + if (query == null) { + return DocSet.EMPTY; + } + return DocSetUtil.createDocSet(searcher, query, null); + } + } + + private class XCJFQueryWeight extends ConstantScoreWeight { + + private SolrIndexSearcher searcher; + private ScoreMode scoreMode; + private Filter filter; + + public XCJFQueryWeight(SolrIndexSearcher searcher, ScoreMode scoreMode, float score) { + super(XCJFQuery.this, score); + this.scoreMode = scoreMode; + this.searcher = searcher; + } + + private String createHashRangeFq() { + if (routedByJoinKey) { + ClusterState clusterState = searcher.getCore().getCoreContainer().getZkController().getClusterState(); + CloudDescriptor desc = searcher.getCore().getCoreDescriptor().getCloudDescriptor(); + Slice slice = clusterState.getCollection(desc.getCollectionName()).getSlicesMap().get(desc.getShardId()); + DocRouter.Range range = slice.getRange(); + + // In CompositeIdRouter, the routing prefix only affects the top 16 bits + int min = range.min & 0xffff0000; + int max = range.max | 0x0000ffff; + + return String.format(Locale.ROOT, "{!hash_range f=%s l=%d u=%d}", fromField, min, max); + } else { + return null; + } + } + + private TupleStream createCloudSolrStream(SolrClientCache solrClientCache) throws IOException { + String streamZkHost; + if (zkHost != null) { + streamZkHost = zkHost; + } else { + streamZkHost = searcher.getCore().getCoreContainer().getZkController().getZkServerAddress(); + } + + ModifiableSolrParams params = new ModifiableSolrParams(otherParams); + params.set(CommonParams.Q, query); + String fq = createHashRangeFq(); + if (fq != null) { + params.add(CommonParams.FQ, fq); + } + params.set(CommonParams.FL, fromField); + params.set(CommonParams.SORT, fromField + " asc"); + params.set(CommonParams.QT, "/export"); + params.set(CommonParams.WT, CommonParams.JAVABIN); + + StreamContext streamContext = new StreamContext(); + streamContext.setSolrClientCache(solrClientCache); + + TupleStream cloudSolrStream = new CloudSolrStream(streamZkHost, collection, params); + TupleStream uniqueStream = new UniqueStream(cloudSolrStream, new FieldEqualitor(fromField)); + uniqueStream.setStreamContext(streamContext); + return uniqueStream; + } + + private TupleStream createSolrStream() { + StreamExpression searchExpr = new StreamExpression("search") + .withParameter(collection) + .withParameter(new StreamExpressionNamedParameter(CommonParams.Q, query)); + String fq = createHashRangeFq(); + if (fq != null) { + searchExpr.withParameter(new StreamExpressionNamedParameter(CommonParams.FQ, fq)); + } + searchExpr.withParameter(new StreamExpressionNamedParameter(CommonParams.FL, fromField)) + .withParameter(new StreamExpressionNamedParameter(CommonParams.SORT, fromField + " asc")) + .withParameter(new StreamExpressionNamedParameter(CommonParams.QT, "/export")); + + for (Map.Entry entry : otherParams) { + for (String value : entry.getValue()) { + searchExpr.withParameter(new StreamExpressionNamedParameter(entry.getKey(), value)); + } + } + + StreamExpression uniqueExpr = new StreamExpression("unique"); + uniqueExpr.withParameter(searchExpr) + .withParameter(new StreamExpressionNamedParameter("over", fromField)); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("expr", uniqueExpr.toString()); + params.set(CommonParams.QT, "/stream"); + params.set(CommonParams.WT, CommonParams.JAVABIN); + + return new SolrStream(solrUrl + "/" + collection, params); + } + + private DocSet getDocSet() throws IOException { + SolrClientCache solrClientCache = new SolrClientCache(); + TupleStream solrStream; + if (zkHost != null || solrUrl == null) { + solrStream = createCloudSolrStream(solrClientCache); + } else { + solrStream = createSolrStream(); + } + + FieldType fieldType = searcher.getSchema().getFieldType(toField); + JoinKeyCollector collector; + if (fieldType.isPointField()) { + collector = new PointJoinKeyCollector(searcher); + } else { + Terms terms = searcher.getSlowAtomicReader().terms(toField); + if (terms == null) { + return DocSet.EMPTY; + } + collector = new TermsJoinKeyCollector(fieldType, terms, searcher); + } + + try { + solrStream.open(); + while (true) { + Tuple tuple = solrStream.read(); + if (tuple.EXCEPTION) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, tuple.getException()); + } + if (tuple.EOF) { + break; + } + + Object value = tuple.get(fromField); + collector.collect(value); + } + } catch (IOException e) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); + } finally { + solrStream.close(); + solrClientCache.close(); + } + + return collector.getDocSet(); + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + if (filter == null) { + filter = getDocSet().getTopFilter(); + } + + DocIdSet readerSet = filter.getDocIdSet(context, null); + if (readerSet == null) { + return null; + } + DocIdSetIterator readerSetIterator = readerSet.iterator(); + if (readerSetIterator == null) { + return null; + } + return new ConstantScoreScorer(this, score(), scoreMode, readerSetIterator); + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return false; + } + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new XCJFQueryWeight((SolrIndexSearcher) searcher, scoreMode, boost); + } + + @Override + public void visit(QueryVisitor visitor) { + visitor.visitLeaf(this); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = classHash(); + result = prime * result + Objects.hashCode(query); + result = prime * result + Objects.hashCode(zkHost); + result = prime * result + Objects.hashCode(solrUrl); + result = prime * result + Objects.hashCode(collection); + result = prime * result + Objects.hashCode(fromField); + result = prime * result + Objects.hashCode(toField); + result = prime * result + Objects.hashCode(routedByJoinKey); + result = prime * result + Objects.hashCode(otherParamsString); + // timestamp and ttl should not be included in hash code + return result; + } + + @Override + public boolean equals(Object other) { + return sameClassAs(other) && + equalsTo(getClass().cast(other)); + } + + private boolean equalsTo(XCJFQuery other) { + return Objects.equals(query, other.query) && + Objects.equals(zkHost, other.zkHost) && + Objects.equals(solrUrl, other.solrUrl) && + Objects.equals(collection, other.collection) && + Objects.equals(fromField, other.fromField) && + Objects.equals(toField, other.toField) && + Objects.equals(routedByJoinKey, other.routedByJoinKey) && + Objects.equals(otherParamsString, other.otherParamsString) && + TimeUnit.SECONDS.convert(Math.abs(timestamp - other.timestamp), TimeUnit.NANOSECONDS) < Math.min(ttl, other.ttl); + } + + @Override + public String toString(String field) { + return String.format(Locale.ROOT, "{!xcjf collection=%s from=%s to=%s routed=%b ttl=%d}%s", + collection, fromField, toField, routedByJoinKey, ttl, query.toString()); + } +} diff --git a/solr/core/src/test-files/solr/configsets/xcjf/conf/schema.xml b/solr/core/src/test-files/solr/configsets/xcjf/conf/schema.xml new file mode 100644 index 00000000000..7afc9a5d48b --- /dev/null +++ b/solr/core/src/test-files/solr/configsets/xcjf/conf/schema.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + id + + diff --git a/solr/core/src/test-files/solr/configsets/xcjf/conf/solrconfig.xml b/solr/core/src/test-files/solr/configsets/xcjf/conf/solrconfig.xml new file mode 100644 index 00000000000..b7a13796010 --- /dev/null +++ b/solr/core/src/test-files/solr/configsets/xcjf/conf/solrconfig.xml @@ -0,0 +1,72 @@ + + + + + + + ${tests.luceneMatchVersion:LATEST} + + ${solr.data.dir:} + + + + + + + + + + + + + + + product_id_s + + ${test.xcjf.solr.url.1:} + ${test.xcjf.solr.url.2:} + ${test.xcjf.solr.url.3:} + + + + + + + product_id_s + + ${test.xcjf.solr.url.1:} + ${test.xcjf.solr.url.2:} + ${test.xcjf.solr.url.3:} + + + + + diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java index fd4e9577f56..00108d7e6ed 100644 --- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java +++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java @@ -34,9 +34,9 @@ import org.junit.BeforeClass; /** - * Sanity checks that queries (generated by the QParser and ValueSourceParser - * framework) are appropriately {@link Object#equals} and - * {@link Object#hashCode()} equivalent. If you are adding a new default + * Sanity checks that queries (generated by the QParser and ValueSourceParser + * framework) are appropriately {@link Object#equals} and + * {@link Object#hashCode()} equivalent. If you are adding a new default * QParser or ValueSourceParser, you will most likely get a failure from * {@link #testParserCoverage} until you add a new test method to this class. * @@ -89,9 +89,9 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { } public void testQueryLucene() throws Exception { - assertQueryEquals("lucene", "{!lucene}apache solr", + assertQueryEquals("lucene", "{!lucene}apache solr", "apache solr", "apache solr "); - assertQueryEquals("lucene", "+apache +solr", "apache AND solr", + assertQueryEquals("lucene", "+apache +solr", "apache AND solr", " +apache +solr"); } @@ -113,20 +113,20 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { public void testQueryPrefix() throws Exception { SolrQueryRequest req = req("myField","foo_s"); try { - assertQueryEquals("prefix", req, - "{!prefix f=$myField}asdf", + assertQueryEquals("prefix", req, + "{!prefix f=$myField}asdf", "{!prefix f=foo_s}asdf"); } finally { req.close(); } } - + public void testQueryBoost() throws Exception { SolrQueryRequest req = req("df","foo_s","myBoost","sum(3,foo_i)"); try { - assertQueryEquals("boost", req, - "{!boost b=$myBoost}asdf", - "{!boost b=$myBoost v=asdf}", + assertQueryEquals("boost", req, + "{!boost b=$myBoost}asdf", + "{!boost b=$myBoost v=asdf}", "{!boost b=sum(3,foo_i)}foo_s:asdf"); } finally { req.close(); @@ -214,11 +214,11 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { } public void testQuerySwitch() throws Exception { - SolrQueryRequest req = req("myXXX", "XXX", + SolrQueryRequest req = req("myXXX", "XXX", "myField", "foo_s", "myQ", "{!prefix f=$myField}asdf"); try { - assertQueryEquals("switch", req, + assertQueryEquals("switch", req, "{!switch case.foo=XXX case.bar=zzz case.yak=qqq}foo", "{!switch case.foo=qqq case.bar=XXX case.yak=zzz} bar ", "{!switch case.foo=qqq case.bar=XXX case.yak=zzz v=' bar '}", @@ -230,7 +230,7 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { "{!switch case=XXX case.bar=zzz case.yak=qqq} ", "{!switch case=$myXXX case.bar=zzz case.yak=qqq} "); - assertQueryEquals("switch", req, + assertQueryEquals("switch", req, "{!switch case.foo=$myQ case.bar=zzz case.yak=qqq}foo", "{!query v=$myQ}"); } finally { @@ -250,16 +250,16 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { for (final String type : new String[]{"dismax","edismax"}) { assertQueryEquals(type, "{!"+type+"}apache solr", "apache solr", "apache solr", "apache solr "); - assertQueryEquals(type, "+apache +solr", "apache AND solr", + assertQueryEquals(type, "+apache +solr", "apache AND solr", " +apache +solr"); } } public void testField() throws Exception { SolrQueryRequest req = req("myField","foo_s"); try { - assertQueryEquals("field", req, - "{!field f=$myField}asdf", - "{!field f=$myField v=asdf}", + assertQueryEquals("field", req, + "{!field f=$myField}asdf", + "{!field f=$myField v=asdf}", "{!field f=foo_s}asdf"); } finally { req.close(); @@ -269,9 +269,9 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { public void testQueryRaw() throws Exception { SolrQueryRequest req = req("myField","foo_s"); try { - assertQueryEquals("raw", req, - "{!raw f=$myField}asdf", - "{!raw f=$myField v=asdf}", + assertQueryEquals("raw", req, + "{!raw f=$myField}asdf", + "{!raw f=$myField v=asdf}", "{!raw f=foo_s}asdf"); } finally { req.close(); @@ -281,9 +281,9 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { public void testQueryTerm() throws Exception { SolrQueryRequest req = req("myField","foo_s"); try { - assertQueryEquals("term", req, - "{!term f=$myField}asdf", - "{!term f=$myField v=asdf}", + assertQueryEquals("term", req, + "{!term f=$myField}asdf", + "{!term f=$myField v=asdf}", "{!term f=foo_s}asdf"); } finally { req.close(); @@ -304,7 +304,7 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { assertQueryEquals("collapse", req, "{!collapse field=$myField min=a}", "{!collapse field=$myField min=a nullPolicy=ignore}"); - + assertQueryEquals("collapse", req, "{!collapse field=$myField sort=$g_sort}", "{!collapse field=$myField sort='foo_s1 asc, foo_i desc'}", @@ -358,10 +358,10 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { public void testQueryNested() throws Exception { SolrQueryRequest req = req("df", "foo_s"); try { - assertQueryEquals("query", req, - "{!query defType=lucene}asdf", - "{!query v='foo_s:asdf'}", - "{!query}foo_s:asdf", + assertQueryEquals("query", req, + "{!query defType=lucene}asdf", + "{!query v='foo_s:asdf'}", + "{!query}foo_s:asdf", "{!query}asdf"); } finally { req.close(); @@ -374,11 +374,11 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { "myField","foo_i", "myInner","product(4,foo_i)"); try { - assertQueryEquals("func", req, + assertQueryEquals("func", req, "{!func}sum(4,5)", "{!func}sum(4,$myVar)", "sum(4,5)"); - assertQueryEquals("func", req, + assertQueryEquals("func", req, "{!func}sum(1,2,3,4,5)", "{!func}sum(1,2,3,4,$myVar)", "sum(1,2,3,4,5)"); @@ -400,7 +400,7 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { "myInner","product(4,foo_i)"); try { // NOTE: unlike most queries, frange defaultsto cost==100 - assertQueryEquals("frange", req, + assertQueryEquals("frange", req, "{!frange l=0.2 h=20.4}sum(4,5)", "{!frange l=0.2 h=20.4 cost=100}sum(4,5)", "{!frange l=$low h=$high}sum(4,$myVar)"); @@ -437,7 +437,7 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { "pt","10.312,-20.556", "sfield","store"); try { - assertQueryEquals(type, req, + assertQueryEquals(type, req, "{!"+type+" d=109}", "{!"+type+" sfield=$sfield}", "{!"+type+" sfield=store d=109}", @@ -445,12 +445,12 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { "{!"+type+" sfield=store d=$d pt=10.312,-20.556}", "{!"+type+"}"); // diff SpatialQueryable FieldTypes matter for determining final query - assertQueryEquals(type, req, + assertQueryEquals(type, req, "{!"+type+" sfield=point_hash}", "{!"+type+" sfield=point_hash d=109}", "{!"+type+" sfield=point_hash d=$d pt=$pt}", "{!"+type+" sfield=point_hash d=$d pt=10.312,-20.556}"); - assertQueryEquals(type, req, + assertQueryEquals(type, req, "{!"+type+" sfield=point}", "{!"+type+" sfield=point d=109}", "{!"+type+" sfield=point d=$d pt=$pt}", @@ -466,7 +466,7 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { "tt", "bar_s"); try { - assertQueryEquals("join", req, + assertQueryEquals("join", req, "{!join from=foo_s to=bar_s}asdf", "{!join from=$ff to=$tt}asdf", "{!join from=$ff to='bar_s'}text:asdf"); @@ -501,14 +501,14 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { "{!parent which=foo_s:parent}dude"); assertQueryEquals("child", "{!child of=foo_s:parent}dude", "{!child of=foo_s:parent}dude"); - // zero query case + // zero query case assertQueryEquals(null, "{!parent which=foo_s:parent}", "{!parent which=foo_s:parent}"); assertQueryEquals(null, "{!child of=foo_s:parent}", "{!child of=foo_s:parent}"); assertQueryEquals(null, "{!parent which='+*:* -foo_s:parent'}", "{!child of=foo_s:parent}"); - + final SolrQueryRequest req = req( "fq","bar_s:baz","fq","{!tag=fqban}bar_s:ban", "ffq","bar_s:baz","ffq","{!tag=ffqban}bar_s:ban"); @@ -521,11 +521,11 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { "{!parent which=foo_s:parent param=$fq excludeTags=fqban}foo_s:bar", "{!parent which=foo_s:parent param=$ffq excludeTags=ffqban}foo_s:bar" // differently named params ); - + QueryUtils.checkUnequal(// parent filter is not an equal to child QParser.getParser("{!child of=foo_s:parent}", req).getQuery(), QParser.getParser("{!parent which=foo_s:parent}", req).getQuery()); - + } finally { req.close(); } @@ -543,15 +543,15 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { ); assertQueryEquals("filters", req, "{!filters param=$fq excludeTags=fqban}foo_s:bar", - "{!filters param=$ffq excludeTags=ffqban}foo_s:bar" + "{!filters param=$ffq excludeTags=ffqban}foo_s:bar" ); assertQueryEquals("filters", req, "{!filters excludeTags=top}{!tag=top v='foo_s:bar'}", - "{!filters param=$ffq excludeTags='ffqban,ffqbaz'}" + "{!filters param=$ffq excludeTags='ffqban,ffqbaz'}" ); QueryUtils.checkUnequal( QParser.getParser("{!filters param=$fq}foo_s:bar", req).getQuery(), - QParser.getParser("{!filters param=$fq excludeTags=fqban}foo_s:bar", req).getQuery()); + QParser.getParser("{!filters param=$fq excludeTags=fqban}foo_s:bar", req).getQuery()); } finally { req.close(); } @@ -568,34 +568,34 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { "useAutn","false" ); // make sure all param subsitution works for all args to graph query. - assertQueryEquals("graph", req, + assertQueryEquals("graph", req, "{!graph from=node_s to=edge_s}*:*", "{!graph from=$from to=$to}*:*"); - + assertQueryEquals("graph", req, "{!graph from=node_s to=edge_s traversalFilter=foo}*:*", "{!graph from=$from to=$to traversalFilter=$traversalFilter}*:*"); - + assertQueryEquals("graph", req, "{!graph from=node_s to=edge_s traversalFilter=foo returnOnlyLeaf=true}*:*", "{!graph from=$from to=$to traversalFilter=$traversalFilter returnOnlyLeaf=$returnOnlyLeaf}*:*"); - + assertQueryEquals("graph", req, "{!graph from=node_s to=edge_s traversalFilter=foo returnOnlyLeaf=true returnRoot=false}*:*", "{!graph from=$from to=$to traversalFilter=$traversalFilter returnOnlyLeaf=$returnOnlyLeaf returnRoot=$returnRoot}*:*"); - + assertQueryEquals("graph", req, "{!graph from=node_s to=edge_s traversalFilter=foo returnOnlyLeaf=true returnRoot=false maxDepth=2}*:*", "{!graph from=$from to=$to traversalFilter=$traversalFilter returnOnlyLeaf=$returnOnlyLeaf returnRoot=$returnRoot maxDepth=$maxDepth}*:*"); - + assertQueryEquals("graph", req, "{!graph from=node_s to=edge_s traversalFilter=foo returnOnlyLeaf=true returnRoot=false maxDepth=2 useAutn=false}*:*", "{!graph from=$from to=$to traversalFilter=$traversalFilter returnOnlyLeaf=$returnOnlyLeaf returnRoot=$returnRoot maxDepth=$maxDepth useAutn=$useAutn}*:*"); - + } public void testQuerySurround() throws Exception { - assertQueryEquals("surround", "{!surround}and(apache,solr)", + assertQueryEquals("surround", "{!surround}and(apache,solr)", "and(apache,solr)", "apache AND solr"); } @@ -607,27 +607,27 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { } public void testFuncTestfunc() throws Exception { - assertFuncEquals("testfunc(foo_i)","testfunc(field(foo_i))"); - assertFuncEquals("testfunc(23)"); + assertFuncEquals("testfunc(foo_i)","testfunc(field(foo_i))"); + assertFuncEquals("testfunc(23)"); assertFuncEquals("testfunc(sum(23,foo_i))", - "testfunc(sum(23,field(foo_i)))"); + "testfunc(sum(23,field(foo_i)))"); } public void testFuncOrd() throws Exception { - assertFuncEquals("ord(foo_s)","ord(foo_s )"); + assertFuncEquals("ord(foo_s)","ord(foo_s )"); } public void testFuncLiteral() throws Exception { SolrQueryRequest req = req("someVar","a string"); try { - assertFuncEquals(req, + assertFuncEquals(req, "literal('a string')","literal(\"a string\")", - "literal($someVar)"); + "literal($someVar)"); } finally { req.close(); } } public void testFuncRord() throws Exception { - assertFuncEquals("rord(foo_s)","rord(foo_s )"); + assertFuncEquals("rord(foo_s)","rord(foo_s )"); } public void testFuncCscore() throws Exception { @@ -640,7 +640,7 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { public void testFuncLinear() throws Exception { SolrQueryRequest req = req("someVar","27"); try { - assertFuncEquals(req, + assertFuncEquals(req, "linear(foo_i,$someVar,42)", "linear(foo_i, 27, 42)"); } finally { @@ -650,7 +650,7 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { public void testFuncRecip() throws Exception { SolrQueryRequest req = req("someVar","27"); try { - assertFuncEquals(req, + assertFuncEquals(req, "recip(foo_i,$someVar,42, 27 )", "recip(foo_i, 27, 42,$someVar)"); } finally { @@ -660,7 +660,7 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { public void testFuncScale() throws Exception { SolrQueryRequest req = req("someVar","27"); try { - assertFuncEquals(req, + assertFuncEquals(req, "scale(field(foo_i),$someVar,42)", "scale(foo_i, 27, 42)"); } finally { @@ -669,17 +669,17 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { } public void testFuncDiv() throws Exception { assertFuncEquals("div(5,4)", "div(5, 4)"); - assertFuncEquals("div(foo_i,4)", "div(foo_i, 4)", + assertFuncEquals("div(foo_i,4)", "div(foo_i, 4)", "div(field('foo_i'), 4)"); - assertFuncEquals("div(foo_i,sub(4,field('bar_i')))", + assertFuncEquals("div(foo_i,sub(4,field('bar_i')))", "div(field(foo_i), sub(4,bar_i))"); } public void testFuncMod() throws Exception { assertFuncEquals("mod(5,4)", "mod(5, 4)"); - assertFuncEquals("mod(foo_i,4)", "mod(foo_i, 4)", + assertFuncEquals("mod(foo_i,4)", "mod(foo_i, 4)", "mod(field('foo_i'), 4)"); - assertFuncEquals("mod(foo_i,sub(4,field('bar_i')))", + assertFuncEquals("mod(foo_i,sub(4,field('bar_i')))", "mod(field(foo_i), sub(4,bar_i))"); } public void testFuncMap() throws Exception { @@ -690,9 +690,9 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { public void testFuncSum() throws Exception { assertFuncEquals("sum(5,4)", "add(5, 4)"); assertFuncEquals("sum(5,4,3,2,1)", "add(5, 4, 3, 2, 1)"); - assertFuncEquals("sum(foo_i,4)", "sum(foo_i, 4)", + assertFuncEquals("sum(foo_i,4)", "sum(foo_i, 4)", "sum(field('foo_i'), 4)"); - assertFuncEquals("add(foo_i,sub(4,field('bar_i')))", + assertFuncEquals("add(foo_i,sub(4,field('bar_i')))", "sum(field(foo_i), sub(4,bar_i))"); } @@ -700,9 +700,9 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { public void testFuncProduct() throws Exception { assertFuncEquals("product(5,4,3,2,1)", "mul(5, 4, 3, 2, 1)"); assertFuncEquals("product(5,4)", "mul(5, 4)"); - assertFuncEquals("product(foo_i,4)", "product(foo_i, 4)", + assertFuncEquals("product(foo_i,4)", "product(foo_i, 4)", "product(field('foo_i'), 4)"); - assertFuncEquals("mul(foo_i,sub(4,field('bar_i')))", + assertFuncEquals("mul(foo_i,sub(4,field('bar_i')))", "product(field(foo_i), sub(4,bar_i))"); } @@ -745,7 +745,7 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { SolrQueryRequest req = req("pt","10.312,-20.556", "sfield","store"); try { - assertFuncEquals(req, + assertFuncEquals(req, "geodist()", "geodist($sfield,$pt)", "geodist(store,$pt)", @@ -777,13 +777,13 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { public void testFuncMin() throws Exception { assertFuncEquals("min(5,4,3,2,1)", "min(5, 4, 3, 2, 1)"); assertFuncEquals("min(foo_i,4)", "min(field('foo_i'), 4)"); - assertFuncEquals("min(foo_i,sub(4,field('bar_i')))", + assertFuncEquals("min(foo_i,sub(4,field('bar_i')))", "min(field(foo_i), sub(4,bar_i))"); } public void testFuncMax() throws Exception { assertFuncEquals("max(5,4,3,2,1)", "max(5, 4, 3, 2, 1)"); assertFuncEquals("max(foo_i,4)", "max(field('foo_i'), 4)"); - assertFuncEquals("max(foo_i,sub(4,field('bar_i')))", + assertFuncEquals("max(foo_i,sub(4,field('bar_i')))", "max(field(foo_i), sub(4,bar_i))"); } @@ -819,8 +819,8 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { } // ttf is an alias for totaltermfreq - assertFuncEquals(req, - "ttf(field_t,'my term')", "ttf('field_t','my term')", + assertFuncEquals(req, + "ttf(field_t,'my term')", "ttf('field_t','my term')", "totaltermfreq(field_t,'my term')"); } finally { @@ -856,12 +856,12 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { public void testFuncExists() throws Exception { SolrQueryRequest req = req("myField","field_t","myQ","asdf"); try { - assertFuncEquals(req, + assertFuncEquals(req, "exists(field_t)", "exists($myField)", "exists(field('field_t'))", "exists(field($myField))"); - assertFuncEquals(req, + assertFuncEquals(req, "exists(query($myQ))", "exists(query({!lucene v=$myQ}))"); } finally { @@ -872,19 +872,19 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { public void testFuncNot() throws Exception { SolrQueryRequest req = req("myField","field_b", "myTrue","true"); try { - assertFuncEquals(req, "not(true)", "not($myTrue)"); - assertFuncEquals(req, "not(not(true))", "not(not($myTrue))"); - assertFuncEquals(req, + assertFuncEquals(req, "not(true)", "not($myTrue)"); + assertFuncEquals(req, "not(not(true))", "not(not($myTrue))"); + assertFuncEquals(req, "not(field_b)", "not($myField)", "not(field('field_b'))", "not(field($myField))"); - assertFuncEquals(req, + assertFuncEquals(req, "not(exists(field_b))", "not(exists($myField))", "not(exists(field('field_b')))", "not(exists(field($myField)))"); - + } finally { req.close(); } @@ -910,12 +910,12 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { "myIntField","bar_i", "myTrue","true"); try { - assertFuncEquals(req, + assertFuncEquals(req, "if(foo_b,bar_i,25)", "if($myBoolField,bar_i,25)", "if(field('foo_b'),$myIntField,25)", "if(field($myBoolField),field('bar_i'),25)"); - assertFuncEquals(req, + assertFuncEquals(req, "if(true,37,field($myIntField))", "if($myTrue,37,$myIntField)"); } finally { @@ -927,11 +927,11 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { SolrQueryRequest req = req("myField","bar_f"); try { - assertFuncEquals(req, + assertFuncEquals(req, "def(bar_f,25)", "def($myField,25)", "def(field('bar_f'),25)"); - assertFuncEquals(req, + assertFuncEquals(req, "def(ceil(bar_f),25)", "def(ceil($myField),25)", "def(ceil(field('bar_f')),25)"); @@ -964,7 +964,7 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { "ceil","floor","rint"}) { try { assertFuncEquals(req, - func + "(field(foo_i))", func + "(foo_i)", + func + "(field(foo_i))", func + "(foo_i)", func + "($myField)"); assertFuncEquals(req, func + "(45)", func+ "($myVal)"); } finally { @@ -979,12 +979,12 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { for (final String func : new String[] {"pow","hypot","atan2"}) { try { assertFuncEquals(req, - func + "(field(foo_i),$myVal)", func+"(foo_i,$myVal)", + func + "(field(foo_i),$myVal)", func+"(foo_i,$myVal)", func + "($myField,45)"); - assertFuncEquals(req, + assertFuncEquals(req, func+"(45,$myOtherVal)", func+"($myVal,27)", func+"($myVal,$myOtherVal)"); - + } finally { req.close(); } @@ -996,31 +996,31 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { "myField","foo_s1"); try { assertFuncEquals(req, - "strdist(\"zot\",literal('yak'),edit)", - "strdist(literal(\"zot\"),'yak', edit )", + "strdist(\"zot\",literal('yak'),edit)", + "strdist(literal(\"zot\"),'yak', edit )", "strdist(literal($myVal),literal($myOtherVal),edit)"); assertFuncEquals(req, - "strdist(\"zot\",literal($myOtherVal),ngram)", + "strdist(\"zot\",literal($myOtherVal),ngram)", "strdist(\"zot\",'yak', ngram, 2)"); assertFuncEquals(req, - "strdist(field('foo_s1'),literal($myOtherVal),jw)", - "strdist(field($myField),\"yak\",jw)", + "strdist(field('foo_s1'),literal($myOtherVal),jw)", + "strdist(field($myField),\"yak\",jw)", "strdist($myField,'yak', jw)"); } finally { req.close(); } } public void testFuncField() throws Exception { - assertFuncEquals("field(\"foo_i\")", - "field('foo_i\')", + assertFuncEquals("field(\"foo_i\")", + "field('foo_i\')", "foo_i"); - + // simple VS of single valued field should be same as asking for min/max on that field - assertFuncEquals("field(\"foo_i\")", - "field('foo_i',min)", - "field(foo_i,'min')", - "field('foo_i',max)", - "field(foo_i,'max')", + assertFuncEquals("field(\"foo_i\")", + "field('foo_i',min)", + "field(foo_i,'min')", + "field('foo_i',max)", + "field(foo_i,'max')", "foo_i"); // multivalued field with selector @@ -1030,15 +1030,15 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { assertTrue(multif + " is no longer multivalued, who broke this schema?", req.getSchema().getField(multif).multiValued()); assertFuncEquals(req, - "field($my_field,'MIN')", + "field($my_field,'MIN')", "field('"+multif+"',min)"); assertFuncEquals(req, - "field($my_field,'max')", - "field('"+multif+"',Max)"); - + "field($my_field,'max')", + "field('"+multif+"',Max)"); + } public void testFuncCurrency() throws Exception { - assertFuncEquals("currency(\"amount\")", + assertFuncEquals("currency(\"amount\")", "currency('amount\')", "currency(amount)", "currency(amount,USD)", @@ -1048,7 +1048,7 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { SolrQueryRequest req = req("fore","foo_s:front", "back","foo_s:back"); try { assertFuncEquals(req, - "agg_relatedness({!query v='foo_s:front'}, {!query v='foo_s:back'})", + "agg_relatedness({!query v='foo_s:front'}, {!query v='foo_s:back'})", "agg_relatedness($fore, $back)"); } finally { req.close(); @@ -1059,7 +1059,7 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { assertFuncEquals("sleep(1,5)", "sleep(1,5)"); assertFuncEquals("threadid()", "threadid()"); } - + // TODO: more tests public void testQueryMaxScore() throws Exception { assertQueryEquals("maxscore", "{!maxscore}A OR B OR C", @@ -1073,9 +1073,9 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { } /** - * this test does not assert anything itself, it simply toggles a static - * boolean informing an @AfterClass method to assert that every default - * qparser and valuesource parser configured was recorded by + * this test does not assert anything itself, it simply toggles a static + * boolean informing an @AfterClass method to assert that every default + * qparser and valuesource parser configured was recorded by * assertQueryEquals and assertFuncEquals. */ public void testParserCoverage() { @@ -1108,7 +1108,7 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { /** - * NOTE: defType is not only used to pick the parser, but also to record + * NOTE: defType is not only used to pick the parser, but also to record * the parser being tested for coverage sanity checking * @see #testParserCoverage * @see #assertQueryEquals @@ -1124,7 +1124,7 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { } /** - * NOTE: defType is not only used to pick the parser, but, if non-null it is + * NOTE: defType is not only used to pick the parser, but, if non-null it is * also to record the parser being tested for coverage sanity checking * * @see QueryUtils#check @@ -1151,8 +1151,8 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { for (int i = 0; i < queries.length; i++) { QueryUtils.check(queries[i]); - // yes starting j=0 is redundent, we're making sure every query - // is equal to itself, and that the quality checks work regardless + // yes starting j=0 is redundent, we're making sure every query + // is equal to itself, and that the quality checks work regardless // of which caller/callee is used. for (int j = 0; j < queries.length; j++) { QueryUtils.checkEqual(queries[i], queries[j]); @@ -1291,6 +1291,18 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { ); } + public void testXCJFQuery() throws Exception { + assertQueryEquals("xcjf", + "{!xcjf collection=abc from=x_id to=x_id}*:*", + "{!xcjf collection=abc from=x_id to=x_id v='*:*'}"); + } + + public void testHashRangeQuery() throws Exception { + assertQueryEquals("hash_range", + "{!hash_range f=x_id l=107347968 u=214695935}", + "{!hash_range l='107347968' u='214695935' f='x_id'}"); + } + // Override req to add df param public static SolrQueryRequest req(String... q) { return SolrTestCaseJ4.req(q, "df", "text"); diff --git a/solr/core/src/test/org/apache/solr/search/join/XCJFQueryTest.java b/solr/core/src/test/org/apache/solr/search/join/XCJFQueryTest.java new file mode 100644 index 00000000000..c58ccb56edd --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/join/XCJFQueryTest.java @@ -0,0 +1,280 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search.join; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Locale; + +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.embedded.JettySolrRunner; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.impl.ZkClientClusterStateProvider; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.request.UpdateRequest; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.cloud.SolrCloudTestCase; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.junit.BeforeClass; +import org.junit.Test; + +public class XCJFQueryTest extends SolrCloudTestCase { + + private static final int NUM_NODES = 3; + private static final int NUM_SHARDS = 3; + private static final int NUM_REPLICAS = 1; + + private static final int NUM_PRODUCTS = 200; + private static final String[] SIZES = new String[]{"S", "M", "L", "XL"}; + + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(NUM_NODES) + .addConfig("xcjf", configset("xcjf")) + .withSolrXml(TEST_PATH().resolve("solr.xml")) + .configure(); + + + CollectionAdminRequest.createCollection("products", "xcjf", NUM_SHARDS, NUM_REPLICAS) + .process(cluster.getSolrClient()); + + CollectionAdminRequest.createCollection("parts", "xcjf", NUM_SHARDS, NUM_REPLICAS) + .process(cluster.getSolrClient()); + + } + + public static void setupIndexes(boolean routeByKey) throws IOException, SolrServerException { + clearCollection("products"); + clearCollection("parts"); + + buildIndexes(routeByKey); + + assertResultCount("products", "*:*", NUM_PRODUCTS, true); + assertResultCount("parts", "*:*", NUM_PRODUCTS * 10 / 4, true); + } + + private static void clearCollection(String collection) throws IOException, SolrServerException { + UpdateRequest update = new UpdateRequest(); + update.deleteByQuery("*:*"); + update.process(cluster.getSolrClient(), collection); + } + + private static void buildIndexes(boolean routeByKey) throws IOException, SolrServerException { + List productDocs = new ArrayList<>(); + List partDocs = new ArrayList<>(); + + for (int productId = 0; productId < NUM_PRODUCTS; ++productId) { + int sizeNum = productId % SIZES.length; + String size = SIZES[sizeNum]; + + productDocs.add(new SolrInputDocument( + "id", buildId(productId, String.valueOf(productId), routeByKey), + "product_id_i", String.valueOf(productId), + "product_id_l", String.valueOf(productId), + "product_id_s", String.valueOf(productId), + "size_s", size)); + + // Index 1 parts document for each small product, 2 for each medium, 3 for each large, etc. + for (int partNum = 0; partNum <= sizeNum; partNum++) { + String partId = String.format(Locale.ROOT, "%d_%d", productId, partNum); + partDocs.add(new SolrInputDocument( + "id", buildId(productId, partId, routeByKey), + "product_id_i", String.valueOf(productId), + "product_id_l", String.valueOf(productId), + "product_id_s", String.valueOf(productId))); + } + } + + indexDocs("products", productDocs); + cluster.getSolrClient().commit("products"); + + indexDocs("parts", partDocs); + cluster.getSolrClient().commit("parts"); + } + + private static String buildId(int productId, String id, boolean routeByKey) { + return routeByKey ? productId + "!" + id : id; + } + + private static void indexDocs(String collection, Collection docs) throws IOException, SolrServerException { + UpdateRequest update = new UpdateRequest(); + update.add(docs); + update.process(cluster.getSolrClient(), collection); + } + + private String getSolrUrl() { + List runners = cluster.getJettySolrRunners(); + JettySolrRunner runner = runners.get(random().nextInt(runners.size())); + return runner.getBaseUrl().toString(); + } + + @Test + public void testXcjfRoutedCollection() throws Exception { + setupIndexes(true); + testXcjfQuery("{!xcjf collection=products from=product_id_i to=product_id_i}size_s:M",true); + int i = 0; + for (JettySolrRunner runner : cluster.getJettySolrRunners()) { + i++; + String url = runner.getBaseUrl().toString(); + System.setProperty("test.xcjf.solr.url." + i, url); + } + try { + // now we need to re-upload our config , now that we know a valid solr url for the cluster. + CloudSolrClient client = cluster.getSolrClient(); + ((ZkClientClusterStateProvider) client.getClusterStateProvider()).uploadConfig(configset("xcjf"), "xcjf"); + // reload the cores with the updated whitelisted solr url config. + CollectionAdminRequest.Reload.reloadCollection("products").process(client); + CollectionAdminRequest.Reload.reloadCollection("parts").process(client); + Thread.sleep(10000); + + testXcjfQuery("{!xcjf collection=products from=product_id_i to=product_id_i}size_s:M",true); + + testXcjfQuery(String.format(Locale.ROOT, + "{!xcjf solrUrl=\"%s\" collection=products from=product_id_i to=product_id_i}size_s:M", getSolrUrl()), + true); + + testXcjfQuery("{!xcjf collection=products from=product_id_l to=product_id_l}size_s:M", + true); + testXcjfQuery(String.format(Locale.ROOT, + "{!xcjf solrUrl=\"%s\" collection=products from=product_id_l to=product_id_l}size_s:M", + getSolrUrl()), + true); + + testXcjfQuery("{!xcjf collection=products from=product_id_s to=product_id_s}size_s:M", + true); + testXcjfQuery(String.format(Locale.ROOT, + "{!xcjf solrUrl=\"%s\" collection=products from=product_id_s to=product_id_s}size_s:M", + getSolrUrl()), + true); + testXcjfQuery(String.format(Locale.ROOT, + "{!xcjf zkHost=\"%s\" collection=products from=product_id_s to=product_id_s}size_s:M", + cluster.getSolrClient().getZkHost()), + true); + + // Test the ability to set other parameters on xcjf and have them passed through + assertResultCount("parts", + "{!xcjf collection=products from=product_id_s to=product_id_s fq=product_id_s:1}size_s:M", + 2, true); + assertResultCount("parts", + String.format(Locale.ROOT, + "{!xcjf solrUrl=\"%s\" collection=products from=product_id_s to=product_id_s fq=product_id_s:1}size_s:M", + getSolrUrl()), 2, true); + } finally { + for (JettySolrRunner runner : cluster.getJettySolrRunners()) { + i++; + System.getProperties().remove("test.xcjf.solr.url." + i); + } + } + } + + @Test + public void testXcjfNonroutedCollection() throws Exception { + setupIndexes(false); + + // This query will expect the collection to have been routed on product_id, so it should return + // incomplete results. + testXcjfQuery("{!xcjf collection=products from=product_id_s to=product_id_s}size_s:M", + false); + // Now if we set routed=false we should get a complete set of results. + testXcjfQuery("{!xcjf collection=products from=product_id_s to=product_id_s routed=false}size_s:M", + true); + // The xcjf_nonrouted query parser doesn't assume that the collection was routed on product_id, + // so we should get the full set of results. + testXcjfQuery("{!xcjf_nonrouted collection=products from=product_id_s to=product_id_s}size_s:M", + true); + // But if we set routed=true, we are now assuming again that the collection was routed on product_id, + // so we should get incomplete results. + testXcjfQuery("{!xcjf_nonrouted collection=products from=product_id_s to=product_id_s routed=true}size_s:M", + false); + } + + @Test + public void testSolrUrlWhitelist() throws Exception { + setupIndexes(false); + + // programmatically add the current jetty solr url to the solrUrl whitelist property in the solrconfig.xml + int i = 0; + for (JettySolrRunner runner : cluster.getJettySolrRunners()) { + i++; + System.setProperty("test.xcjf.solr.url." + i, runner.getBaseUrl().toString()); + } + try { + // now we need to re-upload our config , now that we know a valid solr url for the cluster. + CloudSolrClient client = cluster.getSolrClient(); + ((ZkClientClusterStateProvider) client.getClusterStateProvider()).uploadConfig(configset("xcjf"), "xcjf"); + // reload the cores with the updated whitelisted solr url config. + CollectionAdminRequest.Reload.reloadCollection("products").process(client); + CollectionAdminRequest.Reload.reloadCollection("parts").process(client); + + final ModifiableSolrParams params = new ModifiableSolrParams(); + // a bogus solrUrl + params.add("q", ""); + params.add("rows", "0"); + + // we expect an exception because bogus url isn't valid. + try { + // This should throw an exception. + // verify the xcfj_whitelist definition has the current valid urls and works. + testXcjfQuery(String.format(Locale.ROOT, + "{!xcjf_whitelist solrUrl=\"%s\" collection=products from=product_id_i to=product_id_i}size_s:M", + "http://bogus.example.com:8983/solr"), + true); + fail("The query invovling bogus.example.com should not succeed"); + } catch (Exception e) { + // should get here. + String message = e.getMessage(); + assertTrue("message was " + message, message.contains("SyntaxError: Solr Url was not in the whitelist")); + } + + // verify the xcfj_whitelist definition has the current valid urls and works. + testXcjfQuery(String.format(Locale.ROOT, + "{!xcjf_whitelist solrUrl=\"%s\" collection=products from=product_id_i to=product_id_i}size_s:M", + getSolrUrl()), + true); + + } finally { + for (JettySolrRunner runner : cluster.getJettySolrRunners()) { + i++; + System.getProperties().remove("test.xcjf.solr.url." + i); + } + } + } + + public void testXcjfQuery(String query, boolean expectFullResults) throws Exception { + assertResultCount("parts", query, NUM_PRODUCTS / 2, expectFullResults); + } + + private static void assertResultCount(String collection, String query, long expectedCount, boolean expectFullResults) + throws IOException, SolrServerException { + + final ModifiableSolrParams params = new ModifiableSolrParams(); + params.add("q", query); + params.add("rows", "0"); + + QueryResponse resp = cluster.getSolrClient().query(collection, params); + + if (expectFullResults) { + assertEquals(expectedCount, resp.getResults().getNumFound()); + } else { + assertTrue(resp.getResults().getNumFound() < expectedCount); + } + } +} diff --git a/solr/solr-ref-guide/src/other-parsers.adoc b/solr/solr-ref-guide/src/other-parsers.adoc index 4c4f3cce021..fccc7a41fde 100644 --- a/solr/solr-ref-guide/src/other-parsers.adoc +++ b/solr/solr-ref-guide/src/other-parsers.adoc @@ -549,6 +549,45 @@ http://localhost:8983/solr/alt_graph/query?fl=id&q={!graph+from=id+to=out_edge+m { "id":"H" } ] } ---- +== Hash Range Query Parser + +The hash range query parser will return documents that have a field that contains a value that would be hashed to a particular range. This is used by the XCJF query parser. This query parser has a per segment cache for each field that this query parser will operate on. + +When specifying a min/max hash range and a field name with the hash range query parser, only documents who contain a field value that hashes into that range will be returned. If you want to query for a very large result set, you can query for various hash ranges to return a fraction of the documents with each range request. In the XCJF case, the hash_range query parser is used to ensure that each shard only gets the set of join keys that would end up on that shard. + +This query parser uses the MurmurHash3_x86_32. This is the same as the default hashing for the default composite ID router in Solr. + +=== Hash Range Parameters + +`f`:: +The field name to operate on. This field should have docValues enabled and should be single-valued + +`l`:: +The lower bound of the hash range for the query + +`u`:: +The upper bound for the hash range for the query + +=== Hash Range Example + +[source,text] +---- +{!hash_range f="field_name" l="0" u="12345"} +---- + +=== Hash Range Cache Config + +The hash range query parser uses a special cache to improve the speedup of the queries. The following should be added to the solrconfig.xml for the various fields that you want to perform the hash range query on. Note the name of the cache should be the field name prefixed by "hash_". + +[source,xml] +---- + +---- + == Join Query Parser @@ -1018,6 +1057,71 @@ An optional parameter used to determine which of several query implementations s {!terms f=categoryId method=booleanQuery separator=" "}8 6 7 5309 ---- +== XCJF Query Parser +The Cross Collection Join filter is a query parser plugin that will execute a query against a remote Solr collection to get back a set of join keys that will be used to as a filter query against the local Solr collection. The XCJF query parser will create an XCJFQuery object. The XCJFQuery will first query a remote solr collection and get back a streaming expression result of the join keys. As the join keys are streamed to the node, a bitset of the matching documents in the local index is built up. This avoids keeping the full set of join keys in memory at any given time. This bitset is then inserted into the filter cache upon successful execution as with the normal behavior of the solr filter cache. + +If the local index is sharded according to the join key field, the XCJF query can leverage a secondary query parser called the "hash_range" query parser. The hash_range query parser is responsible for returning only the documents that hash to a given range of values. This allows the XCJFQuery to query the remote solr collection and return only the join keys that would match a specific shard in the local solr collection. This has the benefit of making sure that network traffic doesn't increase as the number of shards increases and allows for much greater scalability. + +XCJF parser works with both String and Point types of fields. The fields that are being used for the join key must be single value and have docValues enabled. It's advised to shard the local collection by the join key as this allows for the optimization mentioned above to be utilized. The XCJF should not be generally used as part of the "q", but rather it is designed to be used as a filter query "fq" parameter to ensure proper caching. The remote solr collection that is being queried should have a single value field for the join key with docValues enabled. The remote solr collection does not have any specific sharding requirements. + +=== XCJF Query Parser definition in solrconfig.xml + +The XCJF has some configuration options that can be specified in the solrconfig.xml + +`routerField`:: +If the documents are routed to shards using the CompositeID router by the join field, then that field name should be specified in the configuration here. This will allow the parser to optimize the resulting HashRange query. + +`solrUrl`:: +If specified, this array of strings specifies the white listed Solr URLs that you can pass to the solrUrl query parameter. Without this configuration the solrUrl parameter cannot be used. This restriction is necessary to prevent an attacker from using solr to explore the network. + +[source,xml] +---- + + + joinfield_id_s + + + http://othersolr.example.com:8983/solr + + +---- + +=== XCJF Query Parameters + +`collection`:: +The name of the external Solr collection to be queried to retrieve the set of join key values ( required ) + +`zkHost`:: +The connection string to be used to connect to Zookeeper. zkHost and solrUrl are both optional parameters, and at most one of them should be specified. If neither of zkHost or solrUrl are specified, the local Zookeeper cluster will be used. ( optional ) + +`solrUrl`:: +The URL of the external Solr node to be queried. Must be a character for character exact match of a whitelisted url. ( optional, disabled by default for security ) + +`from`:: +The join key field name in the external collection ( required ) + +`to`:: +The join key field name in the local collection + +`v`:: +The query substituted in as a local param. This is the query string that will match documents in the remote collection. + +`routed`:: +true / false. If true, the XCJF query will use each shard's hash range to determine the set of join keys to retrieve for that shard. This parameter improves the performance of the cross-collection join, but it depends on the local collection being routed by the toField. If this parameter is not specified, the XCJF query will try to determine the correct value automatically. + +`ttl`:: +The length of time that an XCJF query in the cache will be considered valid, in seconds. Defaults to 3600 (one hour). The XCJF query will not be aware of changes to the remote collection, so if the remote collection is updated, cached XCJF queries may give inaccurate results. After the ttl period has expired, the XCJF query will re-execute the join against the remote collection. + +`All others` +Any normal Solr parameter can also be specified/passed through as a local param. + +=== XCJF Query Examples + +[source,text] +---- +http://localhost:8983/solr/localCollection/query?fl=id&q={!xcjf collection="otherCollection" from="fromField" to="toField" v="*:*"} +---- + == XML Query Parser The {solr-javadocs}/solr-core/org/apache/solr/search/XmlQParserPlugin.html[XmlQParserPlugin] extends the {solr-javadocs}/solr-core/org/apache/solr/search/QParserPlugin.html[QParserPlugin] and supports the creation of queries from XML. Example: