SOLR-12014: Cryptic error message when creating a collection with sharding that violates autoscaling policies

This commit is contained in:
Noble Paul 2018-08-18 00:37:50 +10:00
parent 6e21cb3aa9
commit 124be4e202
12 changed files with 108 additions and 38 deletions

View File

@ -312,6 +312,8 @@ Other Changes
* LUCENE-8456: Upgrade Apache Commons Compress to v1.18 (Steve Rowe)
* SOLR-12014: Cryptic error message when creating a collection with sharding that violates autoscaling policies (noble)
================== 7.4.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -54,6 +54,7 @@ import org.apache.solr.core.CloudConfig;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.handler.admin.CollectionsHandler;
import org.apache.solr.handler.component.ShardHandler;
import org.apache.solr.logging.MDCLoggingContext;
import org.apache.solr.update.UpdateShardHandler;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
@ -125,6 +126,7 @@ public class Overseer implements SolrCloseable {
@Override
public void run() {
MDCLoggingContext.setNode(zkController.getNodeName() );
LeaderStatus isLeader = amILeader();
while (isLeader == LeaderStatus.DONT_KNOW) {
@ -523,6 +525,9 @@ public class Overseer implements SolrCloseable {
}
public synchronized void start(String id) {
MDCLoggingContext.setNode(zkController == null ?
null :
zkController.getNodeName());
this.id = id;
closed = false;
doClose();

View File

@ -43,6 +43,7 @@ import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.common.util.Utils;
import org.apache.solr.logging.MDCLoggingContext;
import org.apache.solr.util.DefaultSolrThreadFactory;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.data.Stat;
@ -121,6 +122,8 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
private OverseerNodePrioritizer prioritizer;
private String thisNode;
public OverseerTaskProcessor(ZkStateReader zkStateReader, String myId,
Stats stats,
OverseerMessageHandlerSelector selector,
@ -141,10 +144,12 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
this.runningZKTasks = new HashSet<>();
this.runningTasks = new HashSet<>();
this.completedTasks = new HashMap<>();
thisNode = Utils.getMDCNode();
}
@Override
public void run() {
MDCLoggingContext.setNode(thisNode);
log.debug("Process current queue of overseer operations");
LeaderStatus isLeader = amILeader();
while (isLeader == LeaderStatus.DONT_KNOW) {

View File

@ -31,12 +31,12 @@ import java.util.Set;
import java.util.stream.Collectors;
import com.google.common.collect.ImmutableMap;
import org.apache.solr.client.solrj.cloud.DistribStateManager;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.cloud.autoscaling.AlreadyExistsException;
import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
import org.apache.solr.client.solrj.cloud.autoscaling.BadVersionException;
import org.apache.solr.client.solrj.cloud.DistribStateManager;
import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData;
import org.apache.solr.cloud.rule.ReplicaAssigner;
import org.apache.solr.cloud.rule.Rule;
@ -402,7 +402,7 @@ public class Assign {
nodesList);
return replicaPositions;
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error getting replica locations", e);
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error getting replica locations : " + e.getMessage(), e);
} finally {
if (log.isTraceEnabled()) {
if (replicaPositions != null)

View File

@ -17,6 +17,7 @@
package org.apache.solr.cloud.autoscaling;
import java.io.IOException;
import java.io.StringWriter;
import java.lang.invoke.MethodHandles;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
@ -36,6 +37,7 @@ import org.apache.solr.client.solrj.cloud.DistributedQueueFactory;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
import org.apache.solr.client.solrj.cloud.autoscaling.Policy;
import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper;
import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo;
import org.apache.solr.client.solrj.cloud.autoscaling.Row;
import org.apache.solr.client.solrj.cloud.autoscaling.Variable.Type;
@ -88,14 +90,20 @@ public class TestPolicyCloud extends SolrCloudTestCase {
String commands = "{ set-cluster-policy: [ {cores: '0', node: '#ANY'} ] }"; // disallow replica placement anywhere
cluster.getSolrClient().request(createAutoScalingRequest(SolrRequest.METHOD.POST, commands));
String collectionName = "testCreateCollection";
expectThrows(HttpSolrClient.RemoteSolrException.class,
HttpSolrClient.RemoteSolrException exp = expectThrows(HttpSolrClient.RemoteSolrException.class,
() -> CollectionAdminRequest.createCollection(collectionName, "conf", 2, 1).process(cluster.getSolrClient()));
assertTrue(exp.getMessage().contains("No node can satisfy the rules"));
assertTrue(exp.getMessage().contains("AutoScaling.error.diagnostics"));
CollectionAdminRequest.deleteCollection(collectionName).processAndWait(cluster.getSolrClient(), 60);
commands = "{ set-cluster-policy: [ {cores: '<2', node: '#ANY'} ] }";
cluster.getSolrClient().request(createAutoScalingRequest(SolrRequest.METHOD.POST, commands));
CollectionAdminRequest.createCollection(collectionName, "conf", 2, 1).process(cluster.getSolrClient());
SolrClientCloudManager scm = new SolrClientCloudManager(new ZkDistributedQueueFactory(cluster.getSolrClient().getZkStateReader().getZkClient()), cluster.getSolrClient());
Policy.Session session = scm.getDistribStateManager().getAutoScalingConfig().getPolicy().createSession(scm);
System.out.println(Utils.writeJson(PolicyHelper.getDiagnostics(session), new StringWriter(), true).toString());
}
public void testDataProviderPerReplicaDetails() throws Exception {

View File

@ -19,22 +19,26 @@ package org.apache.solr.client.solrj.cloud.autoscaling;
import java.io.IOException;
import java.io.StringWriter;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Predicate;
import java.util.function.BiPredicate;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.cloud.DistribStateManager;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.cloud.autoscaling.Suggester.Hint;
import org.apache.solr.client.solrj.impl.ClusterStateProvider;
import org.apache.solr.common.ConditionalMapWriter;
import org.apache.solr.common.IteratorWriter;
import org.apache.solr.common.MapWriter;
import org.apache.solr.common.SolrException;
@ -51,8 +55,10 @@ import static java.util.Collections.emptyMap;
import static java.util.Collections.singletonList;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static org.apache.solr.client.solrj.cloud.autoscaling.Variable.Type.FREEDISK;
import static org.apache.solr.common.ConditionalMapWriter.dedupeKeyPredicate;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.ADDREPLICA;
import static org.apache.solr.common.params.CoreAdminParams.NODE;
import static org.apache.solr.common.util.Utils.handleExp;
import static org.apache.solr.common.util.Utils.time;
import static org.apache.solr.common.util.Utils.timeElapsed;
@ -162,8 +168,14 @@ public class PolicyHelper {
}
SolrRequest op = suggester.getSuggestion();
if (op == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No node can satisfy the rules " +
Utils.toJSONString(Utils.getDeepCopy(session.expandedClauses, 4, true)));
String errorId = "AutoScaling.error.diagnostics." + System.nanoTime();
Policy.Session sessionCopy = suggester.session;
log.error("errorId : " + errorId + " " +
handleExp(log, "", () -> Utils.writeJson(getDiagnostics(sessionCopy), new StringWriter(), true).toString()));
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, " No node can satisfy the rules " +
Utils.toJSONString(Utils.getDeepCopy(session.expandedClauses, 4, true) + " More details from logs in node : "
+ Utils.getMDCNode() + ", errorId : " + errorId));
}
session = suggester.getSession();
positions.add(new ReplicaPosition(shardName, ++idx, e.getKey(), op.getParams().get(NODE)));
@ -184,23 +196,30 @@ public class PolicyHelper {
public static MapWriter getDiagnostics(Policy policy, SolrCloudManager cloudManager) {
Policy.Session session = policy.createSession(cloudManager);
return getDiagnostics(session);
}
public static MapWriter getDiagnostics(Policy.Session session) {
List<Row> sorted = session.getSortedNodes();
Set<String> alreadyWritten = new HashSet<>();
BiPredicate<String, Object> p = dedupeKeyPredicate(alreadyWritten)
.and(ConditionalMapWriter.NON_NULL_VAL)
.and((s, o) -> !(o instanceof Map) || !((Map) o).isEmpty());
return ew -> ew.put("sortedNodes", (IteratorWriter) iw -> {
for (Row row : sorted) {
iw.add((MapWriter) ew1 -> {
ew1.put("node", row.node).
put("isLive", row.isLive);
alreadyWritten.clear();
ew1.put("node", row.node, p).
put("isLive", row.isLive, p);
for (Cell cell : row.getCells())
ew1.put(cell.name, cell.val,
(Predicate) o -> o != null && (!(o instanceof Map) || !((Map) o).isEmpty()));
ew1.put(cell.name, cell.val, p);
ew1.put("replicas", row.collectionVsShardVsReplicas);
});
}
}).put("liveNodes", cloudManager.getClusterStateProvider().getLiveNodes())
}).put("liveNodes", session.cloudManager.getClusterStateProvider().getLiveNodes())
.put("violations", session.getViolations())
.put("config", session.getPolicy());
}
public static List<Suggester.SuggestionInfo> getSuggestions(AutoScalingConfig autoScalingConf, SolrCloudManager cloudManager) {

View File

@ -20,14 +20,18 @@ package org.apache.solr.client.solrj.cloud.autoscaling;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import java.util.function.BiPredicate;
import org.apache.solr.common.MapWriter;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.util.Utils;
import static org.apache.solr.common.ConditionalMapWriter.NON_NULL_VAL;
import static org.apache.solr.common.ConditionalMapWriter.dedupeKeyPredicate;
import static org.apache.solr.common.cloud.ZkStateReader.LEADER_PROP;
@ -87,23 +91,15 @@ public class ReplicaInfo implements MapWriter {
@Override
public void writeMap(EntryWriter ew) throws IOException {
BiPredicate<String, Object> p = dedupeKeyPredicate(new HashSet<>())
.and(NON_NULL_VAL);
ew.put(name, (MapWriter) ew1 -> {
for (Map.Entry<String, Object> e : variables.entrySet()) {
ew1.put(e.getKey(), e.getValue());
}
if (core != null && !variables.containsKey(ZkStateReader.CORE_NAME_PROP)) {
ew1.put(ZkStateReader.CORE_NAME_PROP, core);
}
if (shard != null && !variables.containsKey(ZkStateReader.SHARD_ID_PROP)) {
ew1.put(ZkStateReader.SHARD_ID_PROP, shard);
}
if (collection != null && !variables.containsKey(ZkStateReader.COLLECTION_PROP)) {
ew1.put(ZkStateReader.COLLECTION_PROP, collection);
}
if (node != null && !variables.containsKey(ZkStateReader.NODE_NAME_PROP)) {
ew1.put(ZkStateReader.NODE_NAME_PROP, node);
}
if (type != null) ew1.put(ZkStateReader.REPLICA_TYPE, type.toString());
ew1.put(ZkStateReader.CORE_NAME_PROP, core, p)
.put(ZkStateReader.SHARD_ID_PROP, shard, p)
.put(ZkStateReader.COLLECTION_PROP, collection, p)
.put(ZkStateReader.NODE_NAME_PROP, node, p)
.put(ZkStateReader.REPLICA_TYPE, type.toString(), p);
for (Map.Entry<String, Object> e : variables.entrySet()) ew1.put(e.getKey(), e.getValue(), p);
});
}

View File

@ -73,7 +73,7 @@ public class V2Request extends SolrRequest<V2Response> implements MapWriter {
if (useBinary) {
new JavaBinCodec().marshal(payload, os);
} else {
Utils.toJSON(payload, os, false);
Utils.writeJson(payload, os, false);
}
}

View File

@ -707,7 +707,7 @@ public class SchemaRequest extends AbstractSchemaRequest<SchemaResponse> {
return new RequestWriter.ContentWriter() {
@Override
public void write(OutputStream os) throws IOException {
Utils.toJSON(getRequestParameters(),
Utils.writeJson(getRequestParameters(),
os, false);
}

View File

@ -18,6 +18,7 @@
package org.apache.solr.common;
import java.io.IOException;
import java.util.Set;
import java.util.function.BiPredicate;
public class ConditionalMapWriter implements MapWriter {
@ -72,4 +73,10 @@ public class ConditionalMapWriter implements MapWriter {
public void writeMap(EntryWriter ew) throws IOException {
delegate.writeMap(new EntryWriterWrapper(ew));
}
public static BiPredicate<String, Object> dedupeKeyPredicate(Set<String> keys) {
return (k, v) -> keys.add(k);
}
public static final BiPredicate<String, Object> NON_NULL_VAL = (s, o) -> o != null;
}

View File

@ -23,7 +23,7 @@ import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Predicate;
import java.util.function.BiPredicate;
import org.apache.solr.common.util.Utils;
@ -103,8 +103,8 @@ public interface MapWriter extends MapSerializable {
return this;
}
default EntryWriter put(String k, Object v, Predicate<Object> p) throws IOException {
if (p.test(v)) put(k, v);
default EntryWriter put(String k, Object v, BiPredicate<String, Object> p) throws IOException {
if (p.test(k,v)) put(k, v);
return this;
}

View File

@ -24,6 +24,7 @@ import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.lang.invoke.MethodHandles;
import java.net.URL;
import java.net.URLDecoder;
@ -40,6 +41,7 @@ import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.Callable;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -55,6 +57,7 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.common.SpecProvider;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkOperation;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.CommonParams;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.server.ByteBufferInputStream;
@ -64,6 +67,7 @@ import org.noggit.JSONWriter;
import org.noggit.ObjectBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.MDC;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Collections.unmodifiableList;
@ -138,15 +142,20 @@ public class Utils {
return mutable ? result : result instanceof Set ? unmodifiableSet((Set) result) : unmodifiableList((List) result);
}
public static void toJSON(Object o, OutputStream os, boolean indent) throws IOException {
OutputStreamWriter writer = new OutputStreamWriter(os, UTF_8);
public static void writeJson(Object o, OutputStream os, boolean indent) throws IOException {
writeJson(o, new OutputStreamWriter(os, UTF_8), indent)
.flush();
}
public static Writer writeJson(Object o, Writer writer, boolean indent) throws IOException {
new SolrJSONWriter(writer)
.setIndent(indent)
.writeObj(o)
.close();
writer.flush();
return writer;
}
public static byte[] toJSON(Object o) {
if(o == null) return new byte[0];
CharArr out = new CharArr();
@ -531,4 +540,23 @@ public class Utils {
public static long timeElapsed(TimeSource timeSource, long start, TimeUnit unit) {
return unit.convert(timeSource.getTimeNs() - NANOSECONDS.convert(start, unit), NANOSECONDS);
}
public static String getMDCNode() {
String s = MDC.get(ZkStateReader.NODE_NAME_PROP);
if (s == null) return null;
if (s.startsWith("n:")) {
return s.substring(2);
} else {
return null;
}
}
public static <T> T handleExp(Logger logger, T def, Callable<T> c) {
try {
return c.call();
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
return def;
}
}