SOLR-10623: Add sql Streaming Expression

This commit is contained in:
Joel Bernstein 2017-05-18 12:38:22 -04:00
parent 5a50887a4b
commit f326cfb175
3 changed files with 321 additions and 0 deletions

View File

@ -167,6 +167,7 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
.withFunctionName("get", GetStream.class)
.withFunctionName("timeseries", TimeSeriesStream.class)
.withFunctionName("tuple", TupStream.class)
.withFunctionName("sql", SqlStream.class)
.withFunctionName("col", ColumnEvaluator.class)
.withFunctionName("predict", PredictEvaluator.class)
.withFunctionName("regress", RegressionEvaluator.class)

View File

@ -0,0 +1,280 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.client.solrj.io.stream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.stream.Collectors;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.io.Tuple;
import org.apache.solr.client.solrj.io.comp.StreamComparator;
import org.apache.solr.client.solrj.io.stream.expr.Explanation;
import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType;
import org.apache.solr.client.solrj.io.stream.expr.Expressible;
import org.apache.solr.client.solrj.io.stream.expr.StreamExplanation;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionNamedParameter;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionValue;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
import org.apache.solr.common.cloud.Aliases;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.StrUtils;
public class SqlStream extends TupleStream implements Expressible {
private static final long serialVersionUID = 1;
protected String zkHost;
protected String collection;
protected SolrParams params;
protected transient CloudSolrClient cloudSolrClient;
protected transient TupleStream tupleStream;
protected transient StreamContext streamContext;
/**
* @param zkHost Zookeeper ensemble connection string
* @param collectionName Name of the collection to operate on
* @param params Map<String, String> of parameter/value pairs
* @throws IOException Something went wrong
* <p>
* This form does not allow specifying multiple clauses, say "fq" clauses, use the form that
* takes a SolrParams. Transition code can call the preferred method that takes SolrParams
* by calling CloudSolrStream(zkHost, collectionName,
* new ModifiableSolrParams(SolrParams.toMultiMap(new NamedList(Map&lt;String, String&gt;)));
* @deprecated Use the constructor that has a SolrParams obj rather than a Map
*/
public SqlStream(String zkHost, String collectionName, SolrParams params) throws IOException {
init(collectionName, zkHost, params);
}
public SqlStream(StreamExpression expression, StreamFactory factory) throws IOException{
// grab all parameters out
String collectionName = factory.getValueOperand(expression, 0);
List<StreamExpressionNamedParameter> namedParams = factory.getNamedOperands(expression);
StreamExpressionNamedParameter zkHostExpression = factory.getNamedOperand(expression, "zkHost");
// Collection Name
if(null == collectionName){
throw new IOException(String.format(Locale.ROOT,"invalid expression %s - collectionName expected as first operand",expression));
}
// Validate there are no unknown parameters - zkHost and alias are namedParameter so we don't need to count it twice
if(expression.getParameters().size() != 1 + namedParams.size()){
throw new IOException(String.format(Locale.ROOT,"invalid expression %s - unknown operands found",expression));
}
// Named parameters - passed directly to solr as solrparams
if(0 == namedParams.size()){
throw new IOException(String.format(Locale.ROOT,"invalid expression %s - at least one named parameter expected. eg. 'q=*:*'",expression));
}
ModifiableSolrParams mParams = new ModifiableSolrParams();
for(StreamExpressionNamedParameter namedParam : namedParams){
if(!namedParam.getName().equals("zkHost") && !namedParam.getName().equals("aliases")){
mParams.add(namedParam.getName(), namedParam.getParameter().toString().trim());
}
}
// zkHost, optional - if not provided then will look into factory list to get
String zkHost = null;
if(null == zkHostExpression){
zkHost = factory.getCollectionZkHost(collectionName);
if(zkHost == null) {
zkHost = factory.getDefaultZkHost();
}
}
else if(zkHostExpression.getParameter() instanceof StreamExpressionValue){
zkHost = ((StreamExpressionValue)zkHostExpression.getParameter()).getValue();
}
/*
if(null == zkHost){
throw new IOException(String.format(Locale.ROOT,"invalid expression %s - zkHost not found for collection '%s'",expression,collectionName));
}
*/
// We've got all the required items
init(collectionName, zkHost, mParams);
}
@Override
public StreamExpression toExpression(StreamFactory factory) throws IOException {
// functionName(collectionName, param1, param2, ..., paramN, sort="comp", [aliases="field=alias,..."])
// function name
StreamExpression expression = new StreamExpression(factory.getFunctionName(getClass()));
// collection
expression.addParameter(collection);
// parameters
ModifiableSolrParams mParams = new ModifiableSolrParams(SolrParams.toMultiMap(params.toNamedList()));
for (Entry<String, String[]> param : mParams.getMap().entrySet()) {
String value = String.join(",", param.getValue());
// SOLR-8409: This is a special case where the params contain a " character
// Do note that in any other BASE streams with parameters where a " might come into play
// that this same replacement needs to take place.
value = value.replace("\"", "\\\"");
expression.addParameter(new StreamExpressionNamedParameter(param.getKey(), value));
}
// zkHost
expression.addParameter(new StreamExpressionNamedParameter("zkHost", zkHost));
return expression;
}
@Override
public Explanation toExplanation(StreamFactory factory) throws IOException {
StreamExplanation explanation = new StreamExplanation(getStreamNodeId().toString());
explanation.setFunctionName(factory.getFunctionName(this.getClass()));
explanation.setImplementingClass(this.getClass().getName());
explanation.setExpressionType(ExpressionType.STREAM_SOURCE);
explanation.setExpression(toExpression(factory).toString());
// child is a datastore so add it at this point
StreamExplanation child = new StreamExplanation(getStreamNodeId() + "-datastore");
child.setFunctionName(String.format(Locale.ROOT, "solr (%s)", collection));
child.setImplementingClass("Solr/Lucene");
child.setExpressionType(ExpressionType.DATASTORE);
if(null != params){
ModifiableSolrParams mParams = new ModifiableSolrParams(params);
child.setExpression(mParams.getMap().entrySet().stream().map(e -> String.format(Locale.ROOT, "%s=%s", e.getKey(), e.getValue())).collect(Collectors.joining(",")));
}
explanation.addChild(child);
return explanation;
}
protected void init(String collectionName, String zkHost, SolrParams params) throws IOException {
this.zkHost = zkHost;
this.collection = collectionName;
this.params = new ModifiableSolrParams(params);
// If the comparator is null then it was not explicitly set so we will create one using the sort parameter
// of the query. While doing this we will also take into account any aliases such that if we are sorting on
// fieldA but fieldA is aliased to alias.fieldA then the comparater will be against alias.fieldA.
if (params.get("stmt") == null) {
throw new IOException("stmt param expected for search function");
}
}
public void setStreamContext(StreamContext context) {
this.streamContext = context;
}
/**
* Opens the CloudSolrStream
*
***/
public void open() throws IOException {
constructStream();
tupleStream.open();
}
public List<TupleStream> children() {
return null;
}
public static Collection<Slice> getSlices(String collectionName, ZkStateReader zkStateReader, boolean checkAlias) throws IOException {
ClusterState clusterState = zkStateReader.getClusterState();
Map<String, DocCollection> collectionsMap = clusterState.getCollectionsMap();
// Check collection case sensitive
if(collectionsMap.containsKey(collectionName)) {
return collectionsMap.get(collectionName).getActiveSlices();
}
// Check collection case insensitive
for(String collectionMapKey : collectionsMap.keySet()) {
if(collectionMapKey.equalsIgnoreCase(collectionName)) {
return collectionsMap.get(collectionMapKey).getActiveSlices();
}
}
if(checkAlias) {
// check for collection alias
Aliases aliases = zkStateReader.getAliases();
String alias = aliases.getCollectionAlias(collectionName);
if (alias != null) {
Collection<Slice> slices = new ArrayList<>();
List<String> aliasList = StrUtils.splitSmart(alias, ",", true);
for (String aliasCollectionName : aliasList) {
// Add all active slices for this alias collection
slices.addAll(collectionsMap.get(aliasCollectionName).getActiveSlices());
}
return slices;
}
}
throw new IOException("Slices not found for " + collectionName);
}
protected void constructStream() throws IOException {
try {
List<String> shardUrls = getShards(this.zkHost, this.collection, this.streamContext);
Collections.shuffle(shardUrls);
String url = shardUrls.get(0);
ModifiableSolrParams mParams = new ModifiableSolrParams(params);
mParams.add(CommonParams.QT, "/sql");
this.tupleStream = new SolrStream(url, mParams);
if(streamContext != null) {
tupleStream.setStreamContext(streamContext);
}
} catch (Exception e) {
throw new IOException(e);
}
}
public void close() throws IOException {
tupleStream.close();
}
/** Return the stream sort - ie, the order in which records are returned */
public StreamComparator getStreamSort(){
return null;
}
public Tuple read() throws IOException {
return tupleStream.read();
}
}

View File

@ -242,6 +242,46 @@ public class StreamExpressionTest extends SolrCloudTestCase {
}
}
@Test
public void testSqlStream() throws Exception {
new UpdateRequest()
.add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0")
.add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0")
.add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3")
.add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4")
.add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1")
.commit(cluster.getSolrClient(), COLLECTIONORALIAS);
List<Tuple> tuples;
StreamContext streamContext = new StreamContext();
SolrClientCache solrClientCache = new SolrClientCache();
streamContext.setSolrClientCache(solrClientCache);
List<String> shardUrls = TupleStream.getShards(cluster.getZkServer().getZkAddress(), COLLECTIONORALIAS, streamContext);
try {
StringBuilder buf = new StringBuilder();
for (String shardUrl : shardUrls) {
if (buf.length() > 0) {
buf.append(",");
}
buf.append(shardUrl);
}
ModifiableSolrParams solrParams = new ModifiableSolrParams();
solrParams.add("qt", "/stream");
solrParams.add("expr", "sql("+COLLECTIONORALIAS+", stmt=\"select id from collection1 order by a_i asc\")");
SolrStream solrStream = new SolrStream(shardUrls.get(0), solrParams);
solrStream.setStreamContext(streamContext);
tuples = getTuples(solrStream);
assert (tuples.size() == 5);
assertOrder(tuples, 0, 1, 2, 3, 4);
} finally {
solrClientCache.close();
}
}