From 124be4e202bcc4934f70683ee3d3ec03e9c85cbe Mon Sep 17 00:00:00 2001 From: Noble Paul Date: Sat, 18 Aug 2018 00:37:50 +1000 Subject: [PATCH] SOLR-12014: Cryptic error message when creating a collection with sharding that violates autoscaling policies --- solr/CHANGES.txt | 2 + .../java/org/apache/solr/cloud/Overseer.java | 5 +++ .../solr/cloud/OverseerTaskProcessor.java | 5 +++ .../solr/cloud/api/collections/Assign.java | 6 +-- .../cloud/autoscaling/TestPolicyCloud.java | 10 ++++- .../solrj/cloud/autoscaling/PolicyHelper.java | 39 ++++++++++++++----- .../solrj/cloud/autoscaling/ReplicaInfo.java | 28 ++++++------- .../solr/client/solrj/request/V2Request.java | 2 +- .../solrj/request/schema/SchemaRequest.java | 2 +- .../solr/common/ConditionalMapWriter.java | 7 ++++ .../org/apache/solr/common/MapWriter.java | 6 +-- .../org/apache/solr/common/util/Utils.java | 34 ++++++++++++++-- 12 files changed, 108 insertions(+), 38 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 29a5b527711..70f1d8a88ab 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -312,6 +312,8 @@ Other Changes * LUCENE-8456: Upgrade Apache Commons Compress to v1.18 (Steve Rowe) +* SOLR-12014: Cryptic error message when creating a collection with sharding that violates autoscaling policies (noble) + ================== 7.4.0 ================== Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java index 8c7e9646978..74781d7ebb1 100644 --- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java +++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java @@ -54,6 +54,7 @@ import org.apache.solr.core.CloudConfig; import org.apache.solr.core.CoreContainer; import org.apache.solr.handler.admin.CollectionsHandler; import org.apache.solr.handler.component.ShardHandler; +import org.apache.solr.logging.MDCLoggingContext; import org.apache.solr.update.UpdateShardHandler; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.KeeperException; @@ -125,6 +126,7 @@ public class Overseer implements SolrCloseable { @Override public void run() { + MDCLoggingContext.setNode(zkController.getNodeName() ); LeaderStatus isLeader = amILeader(); while (isLeader == LeaderStatus.DONT_KNOW) { @@ -523,6 +525,9 @@ public class Overseer implements SolrCloseable { } public synchronized void start(String id) { + MDCLoggingContext.setNode(zkController == null ? + null : + zkController.getNodeName()); this.id = id; closed = false; doClose(); diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerTaskProcessor.java b/solr/core/src/java/org/apache/solr/cloud/OverseerTaskProcessor.java index 01afc431cbf..febeec04eee 100644 --- a/solr/core/src/java/org/apache/solr/cloud/OverseerTaskProcessor.java +++ b/solr/core/src/java/org/apache/solr/cloud/OverseerTaskProcessor.java @@ -43,6 +43,7 @@ import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.StrUtils; import org.apache.solr.common.util.Utils; +import org.apache.solr.logging.MDCLoggingContext; import org.apache.solr.util.DefaultSolrThreadFactory; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.data.Stat; @@ -121,6 +122,8 @@ public class OverseerTaskProcessor implements Runnable, Closeable { private OverseerNodePrioritizer prioritizer; + private String thisNode; + public OverseerTaskProcessor(ZkStateReader zkStateReader, String myId, Stats stats, OverseerMessageHandlerSelector selector, @@ -141,10 +144,12 @@ public class OverseerTaskProcessor implements Runnable, Closeable { this.runningZKTasks = new HashSet<>(); this.runningTasks = new HashSet<>(); this.completedTasks = new HashMap<>(); + thisNode = Utils.getMDCNode(); } @Override public void run() { + MDCLoggingContext.setNode(thisNode); log.debug("Process current queue of overseer operations"); LeaderStatus isLeader = amILeader(); while (isLeader == LeaderStatus.DONT_KNOW) { diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/Assign.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/Assign.java index b580bd31bd8..2541faab6a1 100644 --- a/solr/core/src/java/org/apache/solr/cloud/api/collections/Assign.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/Assign.java @@ -31,12 +31,12 @@ import java.util.Set; import java.util.stream.Collectors; import com.google.common.collect.ImmutableMap; +import org.apache.solr.client.solrj.cloud.DistribStateManager; +import org.apache.solr.client.solrj.cloud.SolrCloudManager; import org.apache.solr.client.solrj.cloud.autoscaling.AlreadyExistsException; import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig; import org.apache.solr.client.solrj.cloud.autoscaling.BadVersionException; -import org.apache.solr.client.solrj.cloud.DistribStateManager; import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper; -import org.apache.solr.client.solrj.cloud.SolrCloudManager; import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData; import org.apache.solr.cloud.rule.ReplicaAssigner; import org.apache.solr.cloud.rule.Rule; @@ -402,7 +402,7 @@ public class Assign { nodesList); return replicaPositions; } catch (Exception e) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error getting replica locations", e); + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error getting replica locations : " + e.getMessage(), e); } finally { if (log.isTraceEnabled()) { if (replicaPositions != null) diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/TestPolicyCloud.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/TestPolicyCloud.java index 88e2b704333..c626223cb0b 100644 --- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/TestPolicyCloud.java +++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/TestPolicyCloud.java @@ -17,6 +17,7 @@ package org.apache.solr.cloud.autoscaling; import java.io.IOException; +import java.io.StringWriter; import java.lang.invoke.MethodHandles; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -36,6 +37,7 @@ import org.apache.solr.client.solrj.cloud.DistributedQueueFactory; import org.apache.solr.client.solrj.cloud.SolrCloudManager; import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig; import org.apache.solr.client.solrj.cloud.autoscaling.Policy; +import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper; import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo; import org.apache.solr.client.solrj.cloud.autoscaling.Row; import org.apache.solr.client.solrj.cloud.autoscaling.Variable.Type; @@ -88,14 +90,20 @@ public class TestPolicyCloud extends SolrCloudTestCase { String commands = "{ set-cluster-policy: [ {cores: '0', node: '#ANY'} ] }"; // disallow replica placement anywhere cluster.getSolrClient().request(createAutoScalingRequest(SolrRequest.METHOD.POST, commands)); String collectionName = "testCreateCollection"; - expectThrows(HttpSolrClient.RemoteSolrException.class, + HttpSolrClient.RemoteSolrException exp = expectThrows(HttpSolrClient.RemoteSolrException.class, () -> CollectionAdminRequest.createCollection(collectionName, "conf", 2, 1).process(cluster.getSolrClient())); + assertTrue(exp.getMessage().contains("No node can satisfy the rules")); + assertTrue(exp.getMessage().contains("AutoScaling.error.diagnostics")); CollectionAdminRequest.deleteCollection(collectionName).processAndWait(cluster.getSolrClient(), 60); commands = "{ set-cluster-policy: [ {cores: '<2', node: '#ANY'} ] }"; cluster.getSolrClient().request(createAutoScalingRequest(SolrRequest.METHOD.POST, commands)); CollectionAdminRequest.createCollection(collectionName, "conf", 2, 1).process(cluster.getSolrClient()); + SolrClientCloudManager scm = new SolrClientCloudManager(new ZkDistributedQueueFactory(cluster.getSolrClient().getZkStateReader().getZkClient()), cluster.getSolrClient()); + Policy.Session session = scm.getDistribStateManager().getAutoScalingConfig().getPolicy().createSession(scm); + System.out.println(Utils.writeJson(PolicyHelper.getDiagnostics(session), new StringWriter(), true).toString()); + } public void testDataProviderPerReplicaDetails() throws Exception { diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/PolicyHelper.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/PolicyHelper.java index 8f027a0bef1..d052d6fdc0c 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/PolicyHelper.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/PolicyHelper.java @@ -19,22 +19,26 @@ package org.apache.solr.client.solrj.cloud.autoscaling; import java.io.IOException; +import java.io.StringWriter; import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.Collections; import java.util.EnumMap; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; -import java.util.function.Predicate; +import java.util.function.BiPredicate; import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.cloud.DistribStateManager; import org.apache.solr.client.solrj.cloud.SolrCloudManager; import org.apache.solr.client.solrj.cloud.autoscaling.Suggester.Hint; import org.apache.solr.client.solrj.impl.ClusterStateProvider; +import org.apache.solr.common.ConditionalMapWriter; import org.apache.solr.common.IteratorWriter; import org.apache.solr.common.MapWriter; import org.apache.solr.common.SolrException; @@ -51,8 +55,10 @@ import static java.util.Collections.emptyMap; import static java.util.Collections.singletonList; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static org.apache.solr.client.solrj.cloud.autoscaling.Variable.Type.FREEDISK; +import static org.apache.solr.common.ConditionalMapWriter.dedupeKeyPredicate; import static org.apache.solr.common.params.CollectionParams.CollectionAction.ADDREPLICA; import static org.apache.solr.common.params.CoreAdminParams.NODE; +import static org.apache.solr.common.util.Utils.handleExp; import static org.apache.solr.common.util.Utils.time; import static org.apache.solr.common.util.Utils.timeElapsed; @@ -162,8 +168,14 @@ public class PolicyHelper { } SolrRequest op = suggester.getSuggestion(); if (op == null) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No node can satisfy the rules " + - Utils.toJSONString(Utils.getDeepCopy(session.expandedClauses, 4, true))); + String errorId = "AutoScaling.error.diagnostics." + System.nanoTime(); + Policy.Session sessionCopy = suggester.session; + log.error("errorId : " + errorId + " " + + handleExp(log, "", () -> Utils.writeJson(getDiagnostics(sessionCopy), new StringWriter(), true).toString())); + + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, " No node can satisfy the rules " + + Utils.toJSONString(Utils.getDeepCopy(session.expandedClauses, 4, true) + " More details from logs in node : " + + Utils.getMDCNode() + ", errorId : " + errorId)); } session = suggester.getSession(); positions.add(new ReplicaPosition(shardName, ++idx, e.getKey(), op.getParams().get(NODE))); @@ -184,23 +196,30 @@ public class PolicyHelper { public static MapWriter getDiagnostics(Policy policy, SolrCloudManager cloudManager) { Policy.Session session = policy.createSession(cloudManager); + return getDiagnostics(session); + } + + public static MapWriter getDiagnostics(Policy.Session session) { List sorted = session.getSortedNodes(); + Set alreadyWritten = new HashSet<>(); + BiPredicate p = dedupeKeyPredicate(alreadyWritten) + .and(ConditionalMapWriter.NON_NULL_VAL) + .and((s, o) -> !(o instanceof Map) || !((Map) o).isEmpty()); + return ew -> ew.put("sortedNodes", (IteratorWriter) iw -> { for (Row row : sorted) { iw.add((MapWriter) ew1 -> { - ew1.put("node", row.node). - put("isLive", row.isLive); + alreadyWritten.clear(); + ew1.put("node", row.node, p). + put("isLive", row.isLive, p); for (Cell cell : row.getCells()) - ew1.put(cell.name, cell.val, - (Predicate) o -> o != null && (!(o instanceof Map) || !((Map) o).isEmpty())); + ew1.put(cell.name, cell.val, p); ew1.put("replicas", row.collectionVsShardVsReplicas); }); } - }).put("liveNodes", cloudManager.getClusterStateProvider().getLiveNodes()) + }).put("liveNodes", session.cloudManager.getClusterStateProvider().getLiveNodes()) .put("violations", session.getViolations()) .put("config", session.getPolicy()); - - } public static List getSuggestions(AutoScalingConfig autoScalingConf, SolrCloudManager cloudManager) { diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/ReplicaInfo.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/ReplicaInfo.java index 25f12bde935..97f2521145b 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/ReplicaInfo.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/ReplicaInfo.java @@ -20,14 +20,18 @@ package org.apache.solr.client.solrj.cloud.autoscaling; import java.io.IOException; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.Locale; import java.util.Map; +import java.util.function.BiPredicate; import org.apache.solr.common.MapWriter; import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.util.Utils; +import static org.apache.solr.common.ConditionalMapWriter.NON_NULL_VAL; +import static org.apache.solr.common.ConditionalMapWriter.dedupeKeyPredicate; import static org.apache.solr.common.cloud.ZkStateReader.LEADER_PROP; @@ -87,23 +91,15 @@ public class ReplicaInfo implements MapWriter { @Override public void writeMap(EntryWriter ew) throws IOException { + BiPredicate p = dedupeKeyPredicate(new HashSet<>()) + .and(NON_NULL_VAL); ew.put(name, (MapWriter) ew1 -> { - for (Map.Entry e : variables.entrySet()) { - ew1.put(e.getKey(), e.getValue()); - } - if (core != null && !variables.containsKey(ZkStateReader.CORE_NAME_PROP)) { - ew1.put(ZkStateReader.CORE_NAME_PROP, core); - } - if (shard != null && !variables.containsKey(ZkStateReader.SHARD_ID_PROP)) { - ew1.put(ZkStateReader.SHARD_ID_PROP, shard); - } - if (collection != null && !variables.containsKey(ZkStateReader.COLLECTION_PROP)) { - ew1.put(ZkStateReader.COLLECTION_PROP, collection); - } - if (node != null && !variables.containsKey(ZkStateReader.NODE_NAME_PROP)) { - ew1.put(ZkStateReader.NODE_NAME_PROP, node); - } - if (type != null) ew1.put(ZkStateReader.REPLICA_TYPE, type.toString()); + ew1.put(ZkStateReader.CORE_NAME_PROP, core, p) + .put(ZkStateReader.SHARD_ID_PROP, shard, p) + .put(ZkStateReader.COLLECTION_PROP, collection, p) + .put(ZkStateReader.NODE_NAME_PROP, node, p) + .put(ZkStateReader.REPLICA_TYPE, type.toString(), p); + for (Map.Entry e : variables.entrySet()) ew1.put(e.getKey(), e.getValue(), p); }); } diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/V2Request.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/V2Request.java index c858f7b9f6c..17e3c9bf7f5 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/V2Request.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/V2Request.java @@ -73,7 +73,7 @@ public class V2Request extends SolrRequest implements MapWriter { if (useBinary) { new JavaBinCodec().marshal(payload, os); } else { - Utils.toJSON(payload, os, false); + Utils.writeJson(payload, os, false); } } diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/schema/SchemaRequest.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/schema/SchemaRequest.java index 6add379c243..2e94de87bc8 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/schema/SchemaRequest.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/schema/SchemaRequest.java @@ -707,7 +707,7 @@ public class SchemaRequest extends AbstractSchemaRequest { return new RequestWriter.ContentWriter() { @Override public void write(OutputStream os) throws IOException { - Utils.toJSON(getRequestParameters(), + Utils.writeJson(getRequestParameters(), os, false); } diff --git a/solr/solrj/src/java/org/apache/solr/common/ConditionalMapWriter.java b/solr/solrj/src/java/org/apache/solr/common/ConditionalMapWriter.java index 9a1106f4f09..706414d3bc6 100644 --- a/solr/solrj/src/java/org/apache/solr/common/ConditionalMapWriter.java +++ b/solr/solrj/src/java/org/apache/solr/common/ConditionalMapWriter.java @@ -18,6 +18,7 @@ package org.apache.solr.common; import java.io.IOException; +import java.util.Set; import java.util.function.BiPredicate; public class ConditionalMapWriter implements MapWriter { @@ -72,4 +73,10 @@ public class ConditionalMapWriter implements MapWriter { public void writeMap(EntryWriter ew) throws IOException { delegate.writeMap(new EntryWriterWrapper(ew)); } + + public static BiPredicate dedupeKeyPredicate(Set keys) { + return (k, v) -> keys.add(k); + } + + public static final BiPredicate NON_NULL_VAL = (s, o) -> o != null; } diff --git a/solr/solrj/src/java/org/apache/solr/common/MapWriter.java b/solr/solrj/src/java/org/apache/solr/common/MapWriter.java index 56f3b8d1b94..aa8ab381046 100644 --- a/solr/solrj/src/java/org/apache/solr/common/MapWriter.java +++ b/solr/solrj/src/java/org/apache/solr/common/MapWriter.java @@ -23,7 +23,7 @@ import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.function.Predicate; +import java.util.function.BiPredicate; import org.apache.solr.common.util.Utils; @@ -103,8 +103,8 @@ public interface MapWriter extends MapSerializable { return this; } - default EntryWriter put(String k, Object v, Predicate p) throws IOException { - if (p.test(v)) put(k, v); + default EntryWriter put(String k, Object v, BiPredicate p) throws IOException { + if (p.test(k,v)) put(k, v); return this; } diff --git a/solr/solrj/src/java/org/apache/solr/common/util/Utils.java b/solr/solrj/src/java/org/apache/solr/common/util/Utils.java index ca76c4cb972..c8105e18400 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/Utils.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/Utils.java @@ -24,6 +24,7 @@ import java.io.OutputStreamWriter; import java.io.Reader; import java.io.StringReader; import java.io.UnsupportedEncodingException; +import java.io.Writer; import java.lang.invoke.MethodHandles; import java.net.URL; import java.net.URLDecoder; @@ -40,6 +41,7 @@ import java.util.Map; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; +import java.util.concurrent.Callable; import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -55,6 +57,7 @@ import org.apache.solr.common.SolrException; import org.apache.solr.common.SpecProvider; import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.ZkOperation; +import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.params.CommonParams; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.server.ByteBufferInputStream; @@ -64,6 +67,7 @@ import org.noggit.JSONWriter; import org.noggit.ObjectBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.slf4j.MDC; import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Collections.unmodifiableList; @@ -138,15 +142,20 @@ public class Utils { return mutable ? result : result instanceof Set ? unmodifiableSet((Set) result) : unmodifiableList((List) result); } - public static void toJSON(Object o, OutputStream os, boolean indent) throws IOException { - OutputStreamWriter writer = new OutputStreamWriter(os, UTF_8); + public static void writeJson(Object o, OutputStream os, boolean indent) throws IOException { + writeJson(o, new OutputStreamWriter(os, UTF_8), indent) + .flush(); + } + + public static Writer writeJson(Object o, Writer writer, boolean indent) throws IOException { new SolrJSONWriter(writer) .setIndent(indent) .writeObj(o) .close(); - writer.flush(); + return writer; } + public static byte[] toJSON(Object o) { if(o == null) return new byte[0]; CharArr out = new CharArr(); @@ -531,4 +540,23 @@ public class Utils { public static long timeElapsed(TimeSource timeSource, long start, TimeUnit unit) { return unit.convert(timeSource.getTimeNs() - NANOSECONDS.convert(start, unit), NANOSECONDS); } + + public static String getMDCNode() { + String s = MDC.get(ZkStateReader.NODE_NAME_PROP); + if (s == null) return null; + if (s.startsWith("n:")) { + return s.substring(2); + } else { + return null; + } + } + + public static T handleExp(Logger logger, T def, Callable c) { + try { + return c.call(); + } catch (Exception e) { + logger.error(e.getMessage(), e); + } + return def; + } }