mirror of https://github.com/apache/lucene.git
SOLR-12014: Cryptic error message when creating a collection with sharding that violates autoscaling policies
This commit is contained in:
parent
6e21cb3aa9
commit
124be4e202
|
@ -312,6 +312,8 @@ Other Changes
|
|||
|
||||
* LUCENE-8456: Upgrade Apache Commons Compress to v1.18 (Steve Rowe)
|
||||
|
||||
* SOLR-12014: Cryptic error message when creating a collection with sharding that violates autoscaling policies (noble)
|
||||
|
||||
================== 7.4.0 ==================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
|
||||
|
|
|
@ -54,6 +54,7 @@ import org.apache.solr.core.CloudConfig;
|
|||
import org.apache.solr.core.CoreContainer;
|
||||
import org.apache.solr.handler.admin.CollectionsHandler;
|
||||
import org.apache.solr.handler.component.ShardHandler;
|
||||
import org.apache.solr.logging.MDCLoggingContext;
|
||||
import org.apache.solr.update.UpdateShardHandler;
|
||||
import org.apache.zookeeper.CreateMode;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
|
@ -125,6 +126,7 @@ public class Overseer implements SolrCloseable {
|
|||
|
||||
@Override
|
||||
public void run() {
|
||||
MDCLoggingContext.setNode(zkController.getNodeName() );
|
||||
|
||||
LeaderStatus isLeader = amILeader();
|
||||
while (isLeader == LeaderStatus.DONT_KNOW) {
|
||||
|
@ -523,6 +525,9 @@ public class Overseer implements SolrCloseable {
|
|||
}
|
||||
|
||||
public synchronized void start(String id) {
|
||||
MDCLoggingContext.setNode(zkController == null ?
|
||||
null :
|
||||
zkController.getNodeName());
|
||||
this.id = id;
|
||||
closed = false;
|
||||
doClose();
|
||||
|
|
|
@ -43,6 +43,7 @@ import org.apache.solr.common.cloud.ZkStateReader;
|
|||
import org.apache.solr.common.util.ExecutorUtil;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.common.util.Utils;
|
||||
import org.apache.solr.logging.MDCLoggingContext;
|
||||
import org.apache.solr.util.DefaultSolrThreadFactory;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.apache.zookeeper.data.Stat;
|
||||
|
@ -121,6 +122,8 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||
|
||||
private OverseerNodePrioritizer prioritizer;
|
||||
|
||||
private String thisNode;
|
||||
|
||||
public OverseerTaskProcessor(ZkStateReader zkStateReader, String myId,
|
||||
Stats stats,
|
||||
OverseerMessageHandlerSelector selector,
|
||||
|
@ -141,10 +144,12 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||
this.runningZKTasks = new HashSet<>();
|
||||
this.runningTasks = new HashSet<>();
|
||||
this.completedTasks = new HashMap<>();
|
||||
thisNode = Utils.getMDCNode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
MDCLoggingContext.setNode(thisNode);
|
||||
log.debug("Process current queue of overseer operations");
|
||||
LeaderStatus isLeader = amILeader();
|
||||
while (isLeader == LeaderStatus.DONT_KNOW) {
|
||||
|
|
|
@ -31,12 +31,12 @@ import java.util.Set;
|
|||
import java.util.stream.Collectors;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import org.apache.solr.client.solrj.cloud.DistribStateManager;
|
||||
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.AlreadyExistsException;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.BadVersionException;
|
||||
import org.apache.solr.client.solrj.cloud.DistribStateManager;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper;
|
||||
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData;
|
||||
import org.apache.solr.cloud.rule.ReplicaAssigner;
|
||||
import org.apache.solr.cloud.rule.Rule;
|
||||
|
@ -402,7 +402,7 @@ public class Assign {
|
|||
nodesList);
|
||||
return replicaPositions;
|
||||
} catch (Exception e) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error getting replica locations", e);
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error getting replica locations : " + e.getMessage(), e);
|
||||
} finally {
|
||||
if (log.isTraceEnabled()) {
|
||||
if (replicaPositions != null)
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.solr.cloud.autoscaling;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringWriter;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
|
@ -36,6 +37,7 @@ import org.apache.solr.client.solrj.cloud.DistributedQueueFactory;
|
|||
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.Policy;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.Row;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.Variable.Type;
|
||||
|
@ -88,14 +90,20 @@ public class TestPolicyCloud extends SolrCloudTestCase {
|
|||
String commands = "{ set-cluster-policy: [ {cores: '0', node: '#ANY'} ] }"; // disallow replica placement anywhere
|
||||
cluster.getSolrClient().request(createAutoScalingRequest(SolrRequest.METHOD.POST, commands));
|
||||
String collectionName = "testCreateCollection";
|
||||
expectThrows(HttpSolrClient.RemoteSolrException.class,
|
||||
HttpSolrClient.RemoteSolrException exp = expectThrows(HttpSolrClient.RemoteSolrException.class,
|
||||
() -> CollectionAdminRequest.createCollection(collectionName, "conf", 2, 1).process(cluster.getSolrClient()));
|
||||
|
||||
assertTrue(exp.getMessage().contains("No node can satisfy the rules"));
|
||||
assertTrue(exp.getMessage().contains("AutoScaling.error.diagnostics"));
|
||||
CollectionAdminRequest.deleteCollection(collectionName).processAndWait(cluster.getSolrClient(), 60);
|
||||
|
||||
commands = "{ set-cluster-policy: [ {cores: '<2', node: '#ANY'} ] }";
|
||||
cluster.getSolrClient().request(createAutoScalingRequest(SolrRequest.METHOD.POST, commands));
|
||||
CollectionAdminRequest.createCollection(collectionName, "conf", 2, 1).process(cluster.getSolrClient());
|
||||
SolrClientCloudManager scm = new SolrClientCloudManager(new ZkDistributedQueueFactory(cluster.getSolrClient().getZkStateReader().getZkClient()), cluster.getSolrClient());
|
||||
Policy.Session session = scm.getDistribStateManager().getAutoScalingConfig().getPolicy().createSession(scm);
|
||||
System.out.println(Utils.writeJson(PolicyHelper.getDiagnostics(session), new StringWriter(), true).toString());
|
||||
|
||||
}
|
||||
|
||||
public void testDataProviderPerReplicaDetails() throws Exception {
|
||||
|
|
|
@ -19,22 +19,26 @@ package org.apache.solr.client.solrj.cloud.autoscaling;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringWriter;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumMap;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.function.BiPredicate;
|
||||
|
||||
import org.apache.solr.client.solrj.SolrRequest;
|
||||
import org.apache.solr.client.solrj.cloud.DistribStateManager;
|
||||
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.Suggester.Hint;
|
||||
import org.apache.solr.client.solrj.impl.ClusterStateProvider;
|
||||
import org.apache.solr.common.ConditionalMapWriter;
|
||||
import org.apache.solr.common.IteratorWriter;
|
||||
import org.apache.solr.common.MapWriter;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -51,8 +55,10 @@ import static java.util.Collections.emptyMap;
|
|||
import static java.util.Collections.singletonList;
|
||||
import static java.util.concurrent.TimeUnit.MILLISECONDS;
|
||||
import static org.apache.solr.client.solrj.cloud.autoscaling.Variable.Type.FREEDISK;
|
||||
import static org.apache.solr.common.ConditionalMapWriter.dedupeKeyPredicate;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.ADDREPLICA;
|
||||
import static org.apache.solr.common.params.CoreAdminParams.NODE;
|
||||
import static org.apache.solr.common.util.Utils.handleExp;
|
||||
import static org.apache.solr.common.util.Utils.time;
|
||||
import static org.apache.solr.common.util.Utils.timeElapsed;
|
||||
|
||||
|
@ -162,8 +168,14 @@ public class PolicyHelper {
|
|||
}
|
||||
SolrRequest op = suggester.getSuggestion();
|
||||
if (op == null) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No node can satisfy the rules " +
|
||||
Utils.toJSONString(Utils.getDeepCopy(session.expandedClauses, 4, true)));
|
||||
String errorId = "AutoScaling.error.diagnostics." + System.nanoTime();
|
||||
Policy.Session sessionCopy = suggester.session;
|
||||
log.error("errorId : " + errorId + " " +
|
||||
handleExp(log, "", () -> Utils.writeJson(getDiagnostics(sessionCopy), new StringWriter(), true).toString()));
|
||||
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, " No node can satisfy the rules " +
|
||||
Utils.toJSONString(Utils.getDeepCopy(session.expandedClauses, 4, true) + " More details from logs in node : "
|
||||
+ Utils.getMDCNode() + ", errorId : " + errorId));
|
||||
}
|
||||
session = suggester.getSession();
|
||||
positions.add(new ReplicaPosition(shardName, ++idx, e.getKey(), op.getParams().get(NODE)));
|
||||
|
@ -184,23 +196,30 @@ public class PolicyHelper {
|
|||
|
||||
public static MapWriter getDiagnostics(Policy policy, SolrCloudManager cloudManager) {
|
||||
Policy.Session session = policy.createSession(cloudManager);
|
||||
return getDiagnostics(session);
|
||||
}
|
||||
|
||||
public static MapWriter getDiagnostics(Policy.Session session) {
|
||||
List<Row> sorted = session.getSortedNodes();
|
||||
Set<String> alreadyWritten = new HashSet<>();
|
||||
BiPredicate<String, Object> p = dedupeKeyPredicate(alreadyWritten)
|
||||
.and(ConditionalMapWriter.NON_NULL_VAL)
|
||||
.and((s, o) -> !(o instanceof Map) || !((Map) o).isEmpty());
|
||||
|
||||
return ew -> ew.put("sortedNodes", (IteratorWriter) iw -> {
|
||||
for (Row row : sorted) {
|
||||
iw.add((MapWriter) ew1 -> {
|
||||
ew1.put("node", row.node).
|
||||
put("isLive", row.isLive);
|
||||
alreadyWritten.clear();
|
||||
ew1.put("node", row.node, p).
|
||||
put("isLive", row.isLive, p);
|
||||
for (Cell cell : row.getCells())
|
||||
ew1.put(cell.name, cell.val,
|
||||
(Predicate) o -> o != null && (!(o instanceof Map) || !((Map) o).isEmpty()));
|
||||
ew1.put(cell.name, cell.val, p);
|
||||
ew1.put("replicas", row.collectionVsShardVsReplicas);
|
||||
});
|
||||
}
|
||||
}).put("liveNodes", cloudManager.getClusterStateProvider().getLiveNodes())
|
||||
}).put("liveNodes", session.cloudManager.getClusterStateProvider().getLiveNodes())
|
||||
.put("violations", session.getViolations())
|
||||
.put("config", session.getPolicy());
|
||||
|
||||
|
||||
}
|
||||
|
||||
public static List<Suggester.SuggestionInfo> getSuggestions(AutoScalingConfig autoScalingConf, SolrCloudManager cloudManager) {
|
||||
|
|
|
@ -20,14 +20,18 @@ package org.apache.solr.client.solrj.cloud.autoscaling;
|
|||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.function.BiPredicate;
|
||||
|
||||
import org.apache.solr.common.MapWriter;
|
||||
import org.apache.solr.common.cloud.Replica;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.util.Utils;
|
||||
|
||||
import static org.apache.solr.common.ConditionalMapWriter.NON_NULL_VAL;
|
||||
import static org.apache.solr.common.ConditionalMapWriter.dedupeKeyPredicate;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.LEADER_PROP;
|
||||
|
||||
|
||||
|
@ -87,23 +91,15 @@ public class ReplicaInfo implements MapWriter {
|
|||
|
||||
@Override
|
||||
public void writeMap(EntryWriter ew) throws IOException {
|
||||
BiPredicate<String, Object> p = dedupeKeyPredicate(new HashSet<>())
|
||||
.and(NON_NULL_VAL);
|
||||
ew.put(name, (MapWriter) ew1 -> {
|
||||
for (Map.Entry<String, Object> e : variables.entrySet()) {
|
||||
ew1.put(e.getKey(), e.getValue());
|
||||
}
|
||||
if (core != null && !variables.containsKey(ZkStateReader.CORE_NAME_PROP)) {
|
||||
ew1.put(ZkStateReader.CORE_NAME_PROP, core);
|
||||
}
|
||||
if (shard != null && !variables.containsKey(ZkStateReader.SHARD_ID_PROP)) {
|
||||
ew1.put(ZkStateReader.SHARD_ID_PROP, shard);
|
||||
}
|
||||
if (collection != null && !variables.containsKey(ZkStateReader.COLLECTION_PROP)) {
|
||||
ew1.put(ZkStateReader.COLLECTION_PROP, collection);
|
||||
}
|
||||
if (node != null && !variables.containsKey(ZkStateReader.NODE_NAME_PROP)) {
|
||||
ew1.put(ZkStateReader.NODE_NAME_PROP, node);
|
||||
}
|
||||
if (type != null) ew1.put(ZkStateReader.REPLICA_TYPE, type.toString());
|
||||
ew1.put(ZkStateReader.CORE_NAME_PROP, core, p)
|
||||
.put(ZkStateReader.SHARD_ID_PROP, shard, p)
|
||||
.put(ZkStateReader.COLLECTION_PROP, collection, p)
|
||||
.put(ZkStateReader.NODE_NAME_PROP, node, p)
|
||||
.put(ZkStateReader.REPLICA_TYPE, type.toString(), p);
|
||||
for (Map.Entry<String, Object> e : variables.entrySet()) ew1.put(e.getKey(), e.getValue(), p);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -73,7 +73,7 @@ public class V2Request extends SolrRequest<V2Response> implements MapWriter {
|
|||
if (useBinary) {
|
||||
new JavaBinCodec().marshal(payload, os);
|
||||
} else {
|
||||
Utils.toJSON(payload, os, false);
|
||||
Utils.writeJson(payload, os, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -707,7 +707,7 @@ public class SchemaRequest extends AbstractSchemaRequest<SchemaResponse> {
|
|||
return new RequestWriter.ContentWriter() {
|
||||
@Override
|
||||
public void write(OutputStream os) throws IOException {
|
||||
Utils.toJSON(getRequestParameters(),
|
||||
Utils.writeJson(getRequestParameters(),
|
||||
os, false);
|
||||
}
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
package org.apache.solr.common;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
import java.util.function.BiPredicate;
|
||||
|
||||
public class ConditionalMapWriter implements MapWriter {
|
||||
|
@ -72,4 +73,10 @@ public class ConditionalMapWriter implements MapWriter {
|
|||
public void writeMap(EntryWriter ew) throws IOException {
|
||||
delegate.writeMap(new EntryWriterWrapper(ew));
|
||||
}
|
||||
|
||||
public static BiPredicate<String, Object> dedupeKeyPredicate(Set<String> keys) {
|
||||
return (k, v) -> keys.add(k);
|
||||
}
|
||||
|
||||
public static final BiPredicate<String, Object> NON_NULL_VAL = (s, o) -> o != null;
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.ArrayList;
|
|||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.function.BiPredicate;
|
||||
|
||||
import org.apache.solr.common.util.Utils;
|
||||
|
||||
|
@ -103,8 +103,8 @@ public interface MapWriter extends MapSerializable {
|
|||
return this;
|
||||
}
|
||||
|
||||
default EntryWriter put(String k, Object v, Predicate<Object> p) throws IOException {
|
||||
if (p.test(v)) put(k, v);
|
||||
default EntryWriter put(String k, Object v, BiPredicate<String, Object> p) throws IOException {
|
||||
if (p.test(k,v)) put(k, v);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.io.OutputStreamWriter;
|
|||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.io.Writer;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.URL;
|
||||
import java.net.URLDecoder;
|
||||
|
@ -40,6 +41,7 @@ import java.util.Map;
|
|||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
@ -55,6 +57,7 @@ import org.apache.solr.common.SolrException;
|
|||
import org.apache.solr.common.SpecProvider;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.ZkOperation;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.apache.zookeeper.server.ByteBufferInputStream;
|
||||
|
@ -64,6 +67,7 @@ import org.noggit.JSONWriter;
|
|||
import org.noggit.ObjectBuilder;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.slf4j.MDC;
|
||||
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
import static java.util.Collections.unmodifiableList;
|
||||
|
@ -138,15 +142,20 @@ public class Utils {
|
|||
return mutable ? result : result instanceof Set ? unmodifiableSet((Set) result) : unmodifiableList((List) result);
|
||||
}
|
||||
|
||||
public static void toJSON(Object o, OutputStream os, boolean indent) throws IOException {
|
||||
OutputStreamWriter writer = new OutputStreamWriter(os, UTF_8);
|
||||
public static void writeJson(Object o, OutputStream os, boolean indent) throws IOException {
|
||||
writeJson(o, new OutputStreamWriter(os, UTF_8), indent)
|
||||
.flush();
|
||||
}
|
||||
|
||||
public static Writer writeJson(Object o, Writer writer, boolean indent) throws IOException {
|
||||
new SolrJSONWriter(writer)
|
||||
.setIndent(indent)
|
||||
.writeObj(o)
|
||||
.close();
|
||||
writer.flush();
|
||||
return writer;
|
||||
}
|
||||
|
||||
|
||||
public static byte[] toJSON(Object o) {
|
||||
if(o == null) return new byte[0];
|
||||
CharArr out = new CharArr();
|
||||
|
@ -531,4 +540,23 @@ public class Utils {
|
|||
public static long timeElapsed(TimeSource timeSource, long start, TimeUnit unit) {
|
||||
return unit.convert(timeSource.getTimeNs() - NANOSECONDS.convert(start, unit), NANOSECONDS);
|
||||
}
|
||||
|
||||
public static String getMDCNode() {
|
||||
String s = MDC.get(ZkStateReader.NODE_NAME_PROP);
|
||||
if (s == null) return null;
|
||||
if (s.startsWith("n:")) {
|
||||
return s.substring(2);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public static <T> T handleExp(Logger logger, T def, Callable<T> c) {
|
||||
try {
|
||||
return c.call();
|
||||
} catch (Exception e) {
|
||||
logger.error(e.getMessage(), e);
|
||||
}
|
||||
return def;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue