SOLR-13427: Support simulating the execution of autoscaling suggestions.

This commit is contained in:
Andrzej Bialecki 2019-04-29 23:14:38 +02:00
parent 94b9f7ed1c
commit 44efae15a9
36 changed files with 778 additions and 476 deletions

View File

@ -107,6 +107,8 @@ New Features
* SOLR-13391: Add variance and standard deviation stream evaluators (Nazerke Seidan, Joel Bernstein)
* SOLR-13427: Support simulating the execution of autoscaling suggestions. (ab)
Bug Fixes
----------------------

View File

@ -706,7 +706,7 @@ function run_tool() {
"$JAVA" $SOLR_SSL_OPTS $AUTHC_OPTS $SOLR_ZK_CREDS_AND_ACLS -Dsolr.install.dir="$SOLR_TIP" \
-Dlog4j.configurationFile="file:$DEFAULT_SERVER_DIR/resources/log4j2-console.xml" \
-classpath "$DEFAULT_SERVER_DIR/solr-webapp/webapp/WEB-INF/lib/*:$DEFAULT_SERVER_DIR/lib/ext/*" \
-classpath "$DEFAULT_SERVER_DIR/solr-webapp/webapp/WEB-INF/lib/*:$DEFAULT_SERVER_DIR/lib/ext/*:$DEFAULT_SERVER_DIR/lib/*" \
org.apache.solr.util.SolrCLI "$@"
return $?

View File

@ -19,15 +19,23 @@ package org.apache.solr.cloud;
import java.io.File;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.commons.io.FileUtils;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.CollectionStatePredicate;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
@ -36,6 +44,7 @@ import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.CoreDescriptor;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.util.TimeOut;
import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -44,6 +53,7 @@ import org.slf4j.LoggerFactory;
public class CloudUtil {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final int DEFAULT_TIMEOUT = 90;
/**
* See if coreNodeName has been taken over by another baseUrl and unload core
@ -142,4 +152,134 @@ public class CloudUtil {
}
/**
* Wait for a particular collection state to appear.
*
* This is a convenience method using the {@link #DEFAULT_TIMEOUT}
*
* @param cloudManager current instance of {@link SolrCloudManager}
* @param message a message to report on failure
* @param collection the collection to watch
* @param predicate a predicate to match against the collection state
*/
public static long waitForState(final SolrCloudManager cloudManager,
final String message,
final String collection,
final CollectionStatePredicate predicate) {
AtomicReference<DocCollection> state = new AtomicReference<>();
AtomicReference<Set<String>> liveNodesLastSeen = new AtomicReference<>();
try {
return waitForState(cloudManager, collection, DEFAULT_TIMEOUT, TimeUnit.SECONDS, (n, c) -> {
state.set(c);
liveNodesLastSeen.set(n);
return predicate.matches(n, c);
});
} catch (Exception e) {
throw new AssertionError(message + "\n" + "Live Nodes: " + liveNodesLastSeen.get() + "\nLast available state: " + state.get(), e);
}
}
/**
* Wait for a particular collection state to appear.
*
* This is a convenience method using the {@link #DEFAULT_TIMEOUT}
*
* @param cloudManager current instance of {@link SolrCloudManager}
* @param collection the collection to watch
* @param wait timeout value
* @param unit timeout unit
* @param predicate a predicate to match against the collection state
*/
public static long waitForState(final SolrCloudManager cloudManager,
final String collection,
long wait,
final TimeUnit unit,
final CollectionStatePredicate predicate) throws InterruptedException, TimeoutException, IOException {
TimeOut timeout = new TimeOut(wait, unit, cloudManager.getTimeSource());
long timeWarn = timeout.timeLeft(TimeUnit.MILLISECONDS) / 4;
ClusterState state = null;
DocCollection coll = null;
while (!timeout.hasTimedOut()) {
state = cloudManager.getClusterStateProvider().getClusterState();
coll = state.getCollectionOrNull(collection);
// due to the way we manage collections in SimClusterStateProvider a null here
// can mean that a collection is still being created but has no replicas
if (coll == null) { // does not yet exist?
timeout.sleep(100);
continue;
}
if (predicate.matches(state.getLiveNodes(), coll)) {
log.trace("-- predicate matched with state {}", state);
return timeout.timeElapsed(TimeUnit.MILLISECONDS);
}
timeout.sleep(100);
if (timeout.timeLeft(TimeUnit.MILLISECONDS) < timeWarn) {
log.trace("-- still not matching predicate: {}", state);
}
}
throw new TimeoutException("last ClusterState: " + state + ", last coll state: " + coll);
}
/**
* Return a {@link CollectionStatePredicate} that returns true if a collection has the expected
* number of active shards and replicas
* @param expectedShards expected number of active shards
* @param expectedReplicas expected number of active replicas
*/
public static CollectionStatePredicate clusterShape(int expectedShards, int expectedReplicas) {
return clusterShape(expectedShards, expectedReplicas, false, false);
}
/**
* Return a {@link CollectionStatePredicate} that returns true if a collection has the expected
* number of shards and replicas.
* <p>Note: for shards marked as inactive the current Solr behavior is that replicas remain active.
* {@link org.apache.solr.cloud.autoscaling.sim.SimCloudManager} follows this behavior.</p>
* @param expectedShards expected number of shards
* @param expectedReplicas expected number of active replicas
* @param withInactive if true then count also inactive shards
* @param requireLeaders if true then require that each shard has a leader
*/
public static CollectionStatePredicate clusterShape(int expectedShards, int expectedReplicas, boolean withInactive,
boolean requireLeaders) {
return (liveNodes, collectionState) -> {
if (collectionState == null) {
log.info("-- null collection");
return false;
}
Collection<Slice> slices = withInactive ? collectionState.getSlices() : collectionState.getActiveSlices();
if (slices.size() != expectedShards) {
log.info("-- wrong number of slices for collection {}, expected={}, found={}: {}", collectionState.getName(), expectedShards, collectionState.getSlices().size(), collectionState.getSlices());
return false;
}
Set<String> leaderless = new HashSet<>();
for (Slice slice : slices) {
int activeReplicas = 0;
if (requireLeaders && slice.getState() != Slice.State.INACTIVE && slice.getLeader() == null) {
leaderless.add(slice.getName());
continue;
}
// skip other checks, we're going to fail anyway
if (!leaderless.isEmpty()) {
continue;
}
for (Replica replica : slice) {
if (replica.isActive(liveNodes))
activeReplicas++;
}
if (activeReplicas != expectedReplicas) {
log.info("-- wrong number of active replicas for collection {} in slice {}, expected={}, found={}", collectionState.getName(), slice.getName(), expectedReplicas, activeReplicas);
return false;
}
}
if (leaderless.isEmpty()) {
return true;
} else {
log.info("-- shards without leaders: {}", leaderless);
return false;
}
};
}
}

View File

@ -51,6 +51,8 @@ import org.apache.solr.update.SolrIndexSplitter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.solr.client.solrj.cloud.autoscaling.Variable.Type.CORE_IDX;
/**
*
*/
@ -284,7 +286,7 @@ public class IndexSizeTrigger extends TriggerBase {
replicaName = info.getName(); // which is actually coreNode name...
}
String registry = SolrCoreMetricManager.createRegistryName(true, coll, sh, replicaName, null);
String tag = "metrics:" + registry + ":INDEX.sizeInBytes";
String tag = "metrics:" + registry + ":" + CORE_IDX.metricsAttribute;
metricTags.put(tag, info);
tag = "metrics:" + registry + ":SEARCHER.searcher.numDocs";
metricTags.put(tag, info);

View File

@ -22,6 +22,7 @@ import java.io.File;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
@ -39,7 +40,6 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import com.carrotsearch.randomizedtesting.RandomizedContext;
import com.codahale.metrics.jvm.ClassLoadingGaugeSet;
import com.codahale.metrics.jvm.GarbageCollectorMetricSet;
import com.codahale.metrics.jvm.MemoryUsageGaugeSet;
@ -57,9 +57,11 @@ import org.apache.solr.client.solrj.cloud.autoscaling.Variable;
import org.apache.solr.client.solrj.impl.ClusterStateProvider;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.CollectionApiMapping;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.request.RequestWriter;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.request.V2Request;
import org.apache.solr.client.solrj.response.RequestStatusState;
import org.apache.solr.client.solrj.response.SolrResponseBase;
import org.apache.solr.client.solrj.response.UpdateResponse;
@ -111,6 +113,17 @@ import static org.apache.solr.cloud.api.collections.OverseerCollectionMessageHan
public class SimCloudManager implements SolrCloudManager {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final Random random;
static {
String seed = System.getProperty("tests.seed");
if (seed == null) {
random = new Random();
} else {
random = new Random(seed.hashCode());
}
}
private final SimDistribStateManager stateManager;
private final SimClusterStateProvider clusterStateProvider;
private final SimNodeStateProvider nodeStateProvider;
@ -125,7 +138,7 @@ public class SimCloudManager implements SolrCloudManager {
private final Map<String, Map<String, AtomicInteger>> eventCounts = new ConcurrentHashMap<>();
private final MockSearchableSolrClient solrClient;
private final Map<String, AtomicLong> opCounts = new ConcurrentSkipListMap<>();
/**
/**
* @see #submit
* @see #getBackgroundTaskFailureCount
* @see LoggingCallable
@ -283,6 +296,25 @@ public class SimCloudManager implements SolrCloudManager {
return cloudManager;
}
public static SimCloudManager createCluster(SolrCloudManager other, TimeSource timeSource) throws Exception {
SimCloudManager cloudManager = new SimCloudManager(timeSource);
cloudManager.getSimClusterStateProvider().copyFrom(other.getClusterStateProvider());
List<String> replicaTags = Arrays.asList(
Variable.Type.CORE_IDX.metricsAttribute,
"QUERY./select.requests",
"UPDATE./update.requests"
);
Set<String> nodeTags = createNodeValues("unused:1234_solr").keySet();
for (String node : other.getClusterStateProvider().getLiveNodes()) {
SimClusterStateProvider simClusterStateProvider = cloudManager.getSimClusterStateProvider();
cloudManager.getSimNodeStateProvider().simSetNodeValues(node, other.getNodeStateProvider().getNodeValues(node, nodeTags));
Map<String, Map<String, List<ReplicaInfo>>> infos = other.getNodeStateProvider().getReplicaInfo(node, replicaTags);
simClusterStateProvider.simSetReplicaValues(node, infos, true);
}
cloudManager.getSimDistribStateManager().copyFrom(other.getDistribStateManager(), false);
return cloudManager;
}
/**
* Create simulated node values (metrics) for a node.
* @param nodeName node name (eg. '127.0.0.1:10000_solr'). If null then a new node name will be
@ -415,7 +447,7 @@ public class SimCloudManager implements SolrCloudManager {
* Get the source of randomness (usually initialized by the test suite).
*/
public Random getRandom() {
return RandomizedContext.current().getRandom();
return random;
}
/**
@ -689,6 +721,13 @@ public class SimCloudManager implements SolrCloudManager {
count.incrementAndGet();
}
private static final Map<String, String> v2v1Mapping = new HashMap<>();
static {
for (CollectionApiMapping.Meta meta : CollectionApiMapping.Meta.values()) {
if (meta.action != null) v2v1Mapping.put(meta.commandName, meta.action.toLower());
}
}
/**
* Handler method for autoscaling requests. NOTE: only a specific subset of autoscaling requests is
* supported!
@ -700,7 +739,7 @@ public class SimCloudManager implements SolrCloudManager {
timeSource.sleep(5);
log.trace("--- got SolrRequest: " + req.getMethod() + " " + req.getPath() +
(req.getParams() != null ? " " + req.getParams().toQueryString() : ""));
(req.getParams() != null ? " " + req.getParams() : ""));
if (req.getPath() != null) {
if (req.getPath().startsWith("/admin/autoscaling") ||
req.getPath().startsWith("/cluster/autoscaling") ||
@ -789,27 +828,56 @@ public class SimCloudManager implements SolrCloudManager {
}
}
// support only a specific subset of collection admin ops
if (!(req instanceof CollectionAdminRequest)) {
throw new UnsupportedOperationException("Only some CollectionAdminRequest-s are supported: " + req.getClass().getName());
}
metricManager.registry("solr.node").counter("ADMIN." + req.getPath() + ".requests").inc();
SolrParams params = req.getParams();
String a = params.get(CoreAdminParams.ACTION);
String a = params != null ? params.get(CoreAdminParams.ACTION) : null;
SolrResponse rsp = new SolrResponseBase();
rsp.setResponse(new NamedList<>());
if (!(req instanceof CollectionAdminRequest)) {
// maybe a V2Request?
if (req instanceof V2Request) {
Map<String, Object> reqMap = new HashMap<>();
((V2Request)req).toMap(reqMap);
String path = (String)reqMap.get("path");
if (!path.startsWith("/c/") || path.length() < 4) {
throw new UnsupportedOperationException("Unsupported V2 request path: " + reqMap);
}
Map<String, Object> cmd = (Map<String, Object>)reqMap.get("command");
if (cmd.size() != 1) {
throw new UnsupportedOperationException("Unsupported multi-command V2 request: " + reqMap);
}
a = cmd.keySet().iterator().next();
params = new ModifiableSolrParams();
((ModifiableSolrParams)params).add(CollectionAdminParams.COLLECTION, path.substring(3));
if (req.getParams() != null) {
((ModifiableSolrParams)params).add(req.getParams());
}
Map<String, Object> reqParams = (Map<String, Object>)cmd.get(a);
for (Map.Entry<String, Object> e : reqParams.entrySet()) {
((ModifiableSolrParams)params).add(e.getKey(), e.getValue().toString());
}
// re-map from v2 to v1 action
a = v2v1Mapping.get(a);
if (a == null) {
throw new UnsupportedOperationException("Unsupported V2 request: " + reqMap);
}
} else {
throw new UnsupportedOperationException("Only some CollectionAdminRequest-s are supported: " + req.getClass().getName() + ": " + req.getPath() + " " + req.getParams());
}
}
metricManager.registry("solr.node").counter("ADMIN." + req.getPath() + ".requests").inc();
if (a != null) {
CollectionParams.CollectionAction action = CollectionParams.CollectionAction.get(a);
if (action == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown action: " + a);
}
log.trace("Invoking Collection Action :{} with params {}", action.toLower(), req.getParams().toQueryString());
log.trace("Invoking Collection Action :{} with params {}", action.toLower(), params.toQueryString());
NamedList results = new NamedList();
rsp.setResponse(results);
incrementCount(action.name());
switch (action) {
case REQUESTSTATUS:
// we complete all async ops immediately
String requestId = req.getParams().get(REQUESTID);
String requestId = params.get(REQUESTID);
SimpleOrderedMap<String> status = new SimpleOrderedMap<>();
status.add("state", RequestStatusState.COMPLETED.getKey());
status.add("msg", "found [" + requestId + "] in completed tasks");
@ -820,21 +888,21 @@ public class SimCloudManager implements SolrCloudManager {
rsp.setResponse(results);
break;
case DELETESTATUS:
requestId = req.getParams().get(REQUESTID);
requestId = params.get(REQUESTID);
results.add("status", "successfully removed stored response for [" + requestId + "]");
results.add("success", "");
break;
case CREATE:
try {
clusterStateProvider.simCreateCollection(new ZkNodeProps(req.getParams().toNamedList().asMap(10)), results);
clusterStateProvider.simCreateCollection(new ZkNodeProps(params.toNamedList().asMap(10)), results);
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
break;
case DELETE:
try {
clusterStateProvider.simDeleteCollection(req.getParams().get(CommonParams.NAME),
req.getParams().get(CommonAdminParams.ASYNC), results);
clusterStateProvider.simDeleteCollection(params.get(CommonParams.NAME),
params.get(CommonAdminParams.ASYNC), results);
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
@ -844,21 +912,21 @@ public class SimCloudManager implements SolrCloudManager {
break;
case ADDREPLICA:
try {
clusterStateProvider.simAddReplica(new ZkNodeProps(req.getParams().toNamedList().asMap(10)), results);
clusterStateProvider.simAddReplica(new ZkNodeProps(params.toNamedList().asMap(10)), results);
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
break;
case MOVEREPLICA:
try {
clusterStateProvider.simMoveReplica(new ZkNodeProps(req.getParams().toNamedList().asMap(10)), results);
clusterStateProvider.simMoveReplica(new ZkNodeProps(params.toNamedList().asMap(10)), results);
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
break;
case OVERSEERSTATUS:
if (req.getParams().get(CommonAdminParams.ASYNC) != null) {
results.add(REQUESTID, req.getParams().get(CommonAdminParams.ASYNC));
if (params.get(CommonAdminParams.ASYNC) != null) {
results.add(REQUESTID, params.get(CommonAdminParams.ASYNC));
}
if (!liveNodesSet.get().isEmpty()) {
results.add("leader", liveNodesSet.get().iterator().next());
@ -869,34 +937,34 @@ public class SimCloudManager implements SolrCloudManager {
results.add("success", "");
break;
case ADDROLE:
nodeStateProvider.simSetNodeValue(req.getParams().get("node"), "nodeRole", req.getParams().get("role"));
nodeStateProvider.simSetNodeValue(params.get("node"), "nodeRole", params.get("role"));
break;
case CREATESHARD:
try {
clusterStateProvider.simCreateShard(new ZkNodeProps(req.getParams().toNamedList().asMap(10)), results);
clusterStateProvider.simCreateShard(new ZkNodeProps(params.toNamedList().asMap(10)), results);
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
break;
case SPLITSHARD:
try {
clusterStateProvider.simSplitShard(new ZkNodeProps(req.getParams().toNamedList().asMap(10)), results);
clusterStateProvider.simSplitShard(new ZkNodeProps(params.toNamedList().asMap(10)), results);
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
break;
case DELETESHARD:
try {
clusterStateProvider.simDeleteShard(new ZkNodeProps(req.getParams().toNamedList().asMap(10)), results);
clusterStateProvider.simDeleteShard(new ZkNodeProps(params.toNamedList().asMap(10)), results);
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
break;
default:
throw new UnsupportedOperationException("Unsupported collection admin action=" + action + " in request: " + req.getParams());
throw new UnsupportedOperationException("Unsupported collection admin action=" + action + " in request: " + params);
}
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "action is a required param in request: " + req.getParams());
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "action is a required param in request: " + params);
}
return rsp;
@ -970,7 +1038,7 @@ public class SimCloudManager implements SolrCloudManager {
// be forgiving of errors that occured as a result of interuption, even if
// the inner Callable didn't realize it...
if (Thread.currentThread().isInterrupted()) {
log.warn("Callable interupted w/o noticing", t);
log.warn("Callable interrupted w/o noticing", t);
throw t;
}
Throwable cause = t;

View File

@ -61,7 +61,7 @@ import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.cloud.ActionThrottle;
import org.apache.solr.cloud.CloudTestUtils;
import org.apache.solr.cloud.CloudUtil;
import org.apache.solr.cloud.Overseer;
import org.apache.solr.cloud.api.collections.AddReplicaCmd;
import org.apache.solr.cloud.api.collections.Assign;
@ -94,7 +94,6 @@ import org.apache.solr.common.util.Utils;
import org.apache.solr.core.SolrInfoBean;
import org.apache.solr.metrics.SolrMetricManager;
import org.apache.zookeeper.CreateMode;
import org.junit.Assert;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -185,6 +184,13 @@ public class SimClusterStateProvider implements ClusterStateProvider {
// ============== SIMULATOR SETUP METHODS ====================
public void copyFrom(ClusterStateProvider other) throws Exception {
ClusterState state = other.getClusterState();
simSetClusterState(state);
clusterProperties.clear();
clusterProperties.putAll(other.getClusterProperties());
}
/**
* Initialize from an existing cluster state
* @param initialState initial cluster state
@ -561,11 +567,13 @@ public class SimClusterStateProvider implements ClusterStateProvider {
cores = 0;
}
cloudManager.getSimNodeStateProvider().simSetNodeValue(nodeId, ImplicitSnitch.CORES, cores + 1);
Integer disk = (Integer)values.get(ImplicitSnitch.DISK);
Number disk = (Number)values.get(ImplicitSnitch.DISK);
if (disk == null) {
disk = SimCloudManager.DEFAULT_FREE_DISK;
}
cloudManager.getSimNodeStateProvider().simSetNodeValue(nodeId, ImplicitSnitch.DISK, disk - 1);
long replicaSize = ((Number)replicaInfo.getVariable(Type.CORE_IDX.metricsAttribute)).longValue();
Number replicaSizeGB = (Number)Type.CORE_IDX.convertVal(replicaSize);
cloudManager.getSimNodeStateProvider().simSetNodeValue(nodeId, ImplicitSnitch.DISK, disk.doubleValue() - replicaSizeGB.doubleValue());
// fake metrics
String registry = SolrMetricManager.getRegistryName(SolrInfoBean.Group.core, replicaInfo.getCollection(),
replicaInfo.getShard(),
@ -573,8 +581,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
cloudManager.getMetricManager().registry(registry).counter("UPDATE./update.requests");
cloudManager.getMetricManager().registry(registry).counter("QUERY./select.requests");
cloudManager.getMetricManager().registerGauge(null, registry,
() -> ((Number)replicaInfo.getVariable(Type.CORE_IDX.metricsAttribute)).longValue(),
"", true, "INDEX.sizeInBytes");
() -> replicaSize, "", true, Type.CORE_IDX.metricsAttribute);
// at this point nuke our cached DocCollection state
collectionsStatesRef.set(null);
log.trace("-- simAddReplica {}", replicaInfo);
@ -616,11 +623,16 @@ public class SimClusterStateProvider implements ClusterStateProvider {
throw new Exception("Unexpected value of 'cores' (" + cores + ") on node: " + nodeId);
}
cloudManager.getSimNodeStateProvider().simSetNodeValue(nodeId, ImplicitSnitch.CORES, cores - 1);
Integer disk = (Integer)cloudManager.getSimNodeStateProvider().simGetNodeValue(nodeId, ImplicitSnitch.DISK);
if (disk == null || disk == 0) {
Number disk = (Number)cloudManager.getSimNodeStateProvider().simGetNodeValue(nodeId, ImplicitSnitch.DISK);
if (disk == null || disk.doubleValue() == 0.0) {
throw new Exception("Unexpected value of 'freedisk' (" + disk + ") on node: " + nodeId);
}
cloudManager.getSimNodeStateProvider().simSetNodeValue(nodeId, ImplicitSnitch.DISK, disk + 1);
if (ri.getVariable(Type.CORE_IDX.metricsAttribute) == null) {
throw new RuntimeException("Missing replica size: " + ri);
}
long replicaSize = ((Number)ri.getVariable(Type.CORE_IDX.metricsAttribute)).longValue();
Number replicaSizeGB = (Number)Type.CORE_IDX.convertVal(replicaSize);
cloudManager.getSimNodeStateProvider().simSetNodeValue(nodeId, ImplicitSnitch.DISK, disk.doubleValue() + replicaSizeGB.doubleValue());
}
log.trace("-- simRemoveReplica {}", ri);
simRunLeaderElection(ri.getCollection(), ri.getShard(), true);
@ -645,7 +657,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
try {
VersionedData oldData = stateManager.getData(ZkStateReader.CLUSTER_STATE);
int version = oldData != null ? oldData.getVersion() : 0;
Assert.assertEquals(clusterStateVersion, version);
assert clusterStateVersion == version : "local clusterStateVersion out of sync";
stateManager.setData(ZkStateReader.CLUSTER_STATE, data, version);
log.debug("** saved cluster state version " + (version));
clusterStateVersion++;
@ -919,7 +931,9 @@ public class SimClusterStateProvider implements ClusterStateProvider {
int numNrtReplicas = props.getInt(NRT_REPLICAS, props.getInt(REPLICATION_FACTOR, numTlogReplicas>0?0:1));
int numPullReplicas = props.getInt(PULL_REPLICAS, 0);
int totalReplicas = shardNames.size() * (numNrtReplicas + numPullReplicas + numTlogReplicas);
Assert.assertEquals("unexpected number of replica positions", totalReplicas, replicaPositions.size());
if (totalReplicas != replicaPositions.size()) {
throw new RuntimeException("unexpected number of replica positions: expected " + totalReplicas + " but got " + replicaPositions.size());
}
final CountDownLatch finalStateLatch = new CountDownLatch(replicaPositions.size());
AtomicInteger replicaNum = new AtomicInteger(1);
replicaPositions.forEach(pos -> {
@ -1076,7 +1090,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
}
/**
* Move replica. This uses a similar algorithm as {@link org.apache.solr.cloud.api.collections.MoveReplicaCmd#moveNormalReplica(ClusterState, NamedList, String, String, DocCollection, Replica, Slice, int, boolean)}.
* Move replica. This uses a similar algorithm as {@link org.apache.solr.cloud.api.collections.MoveReplicaCmd} <code>moveNormalReplica(...)</code> method.
* @param message operation details
* @param results operation results.
*/
@ -1305,7 +1319,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
boolean success = false;
try {
CloudTestUtils.waitForState(cloudManager, collectionName, 30, TimeUnit.SECONDS, (liveNodes, state) -> {
CloudUtil.waitForState(cloudManager, collectionName, 30, TimeUnit.SECONDS, (liveNodes, state) -> {
for (String subSlice : subSlices) {
Slice s = state.getSlice(subSlice);
if (s.getLeader() == null) {
@ -1436,8 +1450,8 @@ public class SimClusterStateProvider implements ClusterStateProvider {
OverseerCollectionMessageHandler.NUM_SLICES, "1",
CommonAdminParams.WAIT_FOR_FINAL_STATE, "true");
simCreateCollection(props, new NamedList());
CloudTestUtils.waitForState(cloudManager, CollectionAdminParams.SYSTEM_COLL, 120, TimeUnit.SECONDS,
CloudTestUtils.clusterShape(1, Integer.parseInt(repFactor), false, true));
CloudUtil.waitForState(cloudManager, CollectionAdminParams.SYSTEM_COLL, 120, TimeUnit.SECONDS,
CloudUtil.clusterShape(1, Integer.parseInt(repFactor), false, true));
} catch (Exception e) {
throw new IOException(e);
}
@ -1980,6 +1994,30 @@ public class SimClusterStateProvider implements ClusterStateProvider {
}
}
public void simSetReplicaValues(String node, Map<String, Map<String, List<ReplicaInfo>>> source, boolean overwrite) {
List<ReplicaInfo> infos = nodeReplicaMap.get(node);
Map<String, ReplicaInfo> infoMap = new HashMap<>();
infos.forEach(ri -> infoMap.put(ri.getName(), ri));
if (infos == null) {
throw new RuntimeException("Node not present: " + node);
}
source.forEach((coll, shards) -> shards.forEach((shard, replicas) -> replicas.forEach(r -> {
ReplicaInfo target = infoMap.get(r.getName());
if (target == null) {
throw new RuntimeException("Unable to find simulated replica of " + r);
}
r.getVariables().forEach((k, v) -> {
if (target.getVariables().containsKey(k)) {
if (overwrite) {
target.getVariables().put(k, v);
}
} else {
target.getVariables().put(k, v);
}
});
})));
}
/**
* Return all replica infos for a node.
* @param node node id
@ -2043,7 +2081,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
ReplicaInfo ri = getReplicaInfo(s.getReplicas().iterator().next());
if (ri != null) {
Number numDocs = (Number)ri.getVariable("SEARCHER.searcher.numDocs");
Number numBytes = (Number)ri.getVariable("INDEX.sizeInBytes");
Number numBytes = (Number)ri.getVariable(Type.CORE_IDX.metricsAttribute);
if (numDocs != null) {
inactiveDocs.addValue(numDocs.doubleValue());
}
@ -2081,7 +2119,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
if (ri != null) {
Number numDocs = (Number)ri.getVariable("SEARCHER.searcher.numDocs");
Number delDocs = (Number)ri.getVariable("SEARCHER.searcher.deleteDocs");
Number numBytes = (Number)ri.getVariable("INDEX.sizeInBytes");
Number numBytes = (Number)ri.getVariable(Type.CORE_IDX.metricsAttribute);
if (numDocs != null) {
docs.addValue(numDocs.doubleValue());
}

View File

@ -136,7 +136,7 @@ public class SimDistribStateManager implements DistribStateManager {
public VersionedData getData(Watcher w) {
dataLock.lock();
try {
VersionedData res = new VersionedData(version, data, clientId);
VersionedData res = new VersionedData(version, data, mode, clientId);
if (w != null && !dataWatches.contains(w)) {
dataWatches.add(w);
}
@ -236,6 +236,32 @@ public class SimDistribStateManager implements DistribStateManager {
juteMaxbuffer = Integer.parseInt(bufferSize);
}
/**
* Copy all content from another DistribStateManager.
* @param other another state manager.
* @param failOnExists abort copy when one or more paths already exist (the state of this manager remains unchanged).
*/
public void copyFrom(DistribStateManager other, boolean failOnExists) throws InterruptedException, IOException, KeeperException, AlreadyExistsException, BadVersionException {
List<String> tree = other.listTree("/");
// check if any node exists
for (String path : tree) {
if (hasData(path) && failOnExists) {
throw new AlreadyExistsException(path);
}
}
for (String path : tree) {
VersionedData data = other.getData(path);
if (hasData(path)) {
setData(path, data.getData(), -1);
} else {
makePath(path, data.getData(), data.getMode(), failOnExists);
}
// hack: set the version to be the same as the source
Node n = traverse(path, false, CreateMode.PERSISTENT);
n.version = data.getVersion();
}
}
public SimDistribStateManager(ActionThrottle actionThrottle, ActionError actionError) {
this(null, actionThrottle, actionError);
}
@ -520,7 +546,7 @@ public class SimDistribStateManager implements DistribStateManager {
@Override
public void setData(String path, byte[] data, int version) throws NoSuchElementException, BadVersionException, IOException {
if (data.length > juteMaxbuffer) {
if (data != null && data.length > juteMaxbuffer) {
throw new IOException("Len error " + data.length);
}
multiLock.lock();

View File

@ -61,6 +61,7 @@ import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.cloud.NodeStateProvider;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo;
import org.apache.solr.client.solrj.cloud.autoscaling.Variable;
import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData;
import org.apache.solr.client.solrj.impl.HttpClientUtil;
import org.apache.solr.cloud.LeaderElector;
@ -136,7 +137,7 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
DEFAULT_NODE_GAUGES.add("CONTAINER.fs.coreRoot.usableSpace");
DEFAULT_CORE_GAUGES.add("INDEX.sizeInBytes");
DEFAULT_CORE_GAUGES.add(Variable.Type.CORE_IDX.metricsAttribute);
DEFAULT_CORE_COUNTERS.add("QUERY./select.requests");
DEFAULT_CORE_COUNTERS.add("UPDATE./update.requests");

View File

@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.solr.client.solrj.SolrClient;
@ -32,7 +33,6 @@ import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.junit.Assert;
/**
* Simple mock client that collects added documents and supports simple search by id
@ -62,7 +62,7 @@ public class MockSearchableSolrClient extends SolrClient {
if (docList != null) {
docList.forEach(doc -> {
String id = (String) doc.getFieldValue("id");
Assert.assertNotNull(doc.toString(), id);
Objects.requireNonNull(id, doc.toString());
docs.computeIfAbsent(collection, c -> new LinkedHashMap<>()).put(id, doc);
});
}

View File

@ -101,11 +101,13 @@ import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.cloud.DistributedQueue;
import org.apache.solr.client.solrj.cloud.DistributedQueueFactory;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
import org.apache.solr.client.solrj.cloud.autoscaling.Policy;
import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper;
import org.apache.solr.client.solrj.cloud.autoscaling.Row;
import org.apache.solr.client.solrj.cloud.autoscaling.Suggester;
import org.apache.solr.client.solrj.cloud.autoscaling.Variable;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.impl.HttpClientUtil;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
@ -116,6 +118,7 @@ import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.cloud.autoscaling.sim.SimCloudManager;
import org.apache.solr.common.MapWriter;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.ClusterState;
@ -197,6 +200,9 @@ public class SolrCLI {
String excMsg = exc.getMessage();
if (excMsg != null) {
System.err.println("\nERROR: " + excMsg + "\n");
if (verbose) {
exc.printStackTrace(System.err);
}
toolExitStatus = 1;
} else {
throw exc;
@ -856,6 +862,8 @@ public class SolrCLI {
static final String NODE_REDACTION_PREFIX = "N_";
static final String COLL_REDACTION_PREFIX = "COLL_";
private boolean verbose;
public AutoscalingTool() {
this(System.out);
}
@ -903,6 +911,15 @@ public class SolrCLI {
OptionBuilder
.withDescription("Show summarized collection & node statistics.")
.create("stats"),
OptionBuilder
.withDescription("Simulate execution of all suggestions.")
.create("simulate"),
OptionBuilder
.withDescription("Max number of simulation iterations.")
.withArgName("NUMBER")
.hasArg()
.withLongOpt("iterations")
.create("i"),
OptionBuilder
.withDescription("Turn on all options to get all available information.")
.create("all")
@ -928,34 +945,24 @@ public class SolrCLI {
throw new UnsupportedOperationException("removeQueue");
}
};
try (SolrClientCloudManager clientCloudManager = new SolrClientCloudManager(dummmyFactory, cloudSolrClient)) {
try (SolrClientCloudManager realCloudManager = new SolrClientCloudManager(dummmyFactory, cloudSolrClient)) {
AutoScalingConfig config = null;
HashSet<String> liveNodes = new HashSet<>();
String configFile = cli.getOptionValue("a");
if (configFile != null) {
log.info("- reading autoscaling config from " + configFile);
if (verbose) {
log.info("- reading autoscaling config from " + configFile);
}
config = new AutoScalingConfig(IOUtils.toByteArray(new FileInputStream(configFile)));
} else {
log.info("- reading autoscaling config from the cluster.");
config = clientCloudManager.getDistribStateManager().getAutoScalingConfig();
if (verbose) {
log.info("- reading autoscaling config from the cluster.");
}
config = realCloudManager.getDistribStateManager().getAutoScalingConfig();
}
log.info("- calculating suggestions...");
long start = TimeSource.NANO_TIME.getTimeNs();
// collect live node names for optional redaction
liveNodes.addAll(clientCloudManager.getClusterStateProvider().getLiveNodes());
List<Suggester.SuggestionInfo> suggestions = PolicyHelper.getSuggestions(config, clientCloudManager);
long end = TimeSource.NANO_TIME.getTimeNs();
log.info(" (took " + TimeUnit.NANOSECONDS.toMillis(end - start) + " ms)");
log.info("- calculating diagnostics...");
start = TimeSource.NANO_TIME.getTimeNs();
// update the live nodes
liveNodes.addAll(clientCloudManager.getClusterStateProvider().getLiveNodes());
Policy.Session session = config.getPolicy().createSession(clientCloudManager);
MapWriter mw = PolicyHelper.getDiagnostics(session);
Map<String, Object> diagnostics = new LinkedHashMap<>();
mw.toMap(diagnostics);
end = TimeSource.NANO_TIME.getTimeNs();
log.info(" (took " + TimeUnit.NANOSECONDS.toMillis(end - start) + " ms)");
// freeze the cluster state
SimCloudManager cloudManager = SimCloudManager.createCluster(realCloudManager, TimeSource.get("simTime:50"));
liveNodes.addAll(cloudManager.getClusterStateProvider().getLiveNodes());
boolean withSuggestions = cli.hasOption("s");
boolean withDiagnostics = cli.hasOption("d") || cli.hasOption("n");
boolean withSortedNodes = cli.hasOption("n");
@ -987,141 +994,26 @@ public class SolrCLI {
}
// redact collection names too
Set<String> redactCollections = new HashSet<>();
ClusterState clusterState = clientCloudManager.getClusterStateProvider().getClusterState();
ClusterState clusterState = cloudManager.getClusterStateProvider().getClusterState();
clusterState.forEachCollection(coll -> redactCollections.add(coll.getName()));
if (!withSuggestions && !withDiagnostics) {
withSuggestions = true;
}
Map<String, Object> results = new LinkedHashMap<>();
if (withClusterState) {
Map<String, Object> map = new LinkedHashMap<>();
map.put("znodeVersion", clusterState.getZNodeVersion());
map.put("liveNodes", new TreeSet<>(clusterState.getLiveNodes()));
map.put("collections", clusterState.getCollectionsMap());
results.put("CLUSTERSTATE", map);
}
if (withStats) {
Map<String, Map<String, Number>> collStats = new TreeMap<>();
clusterState.forEachCollection(coll -> {
Map<String, Number> perColl = collStats.computeIfAbsent(coll.getName(), n -> new LinkedHashMap<>());
AtomicInteger numCores = new AtomicInteger();
HashMap<String, Map<String, AtomicInteger>> nodes = new HashMap<>();
coll.getSlices().forEach(s -> {
numCores.addAndGet(s.getReplicas().size());
s.getReplicas().forEach(r -> {
nodes.computeIfAbsent(r.getNodeName(), n -> new HashMap<>())
.computeIfAbsent(s.getName(), slice -> new AtomicInteger()).incrementAndGet();
});
});
int maxCoresPerNode = 0;
int minCoresPerNode = 0;
int maxActualShardsPerNode = 0;
int minActualShardsPerNode = 0;
int maxShardReplicasPerNode = 0;
int minShardReplicasPerNode = 0;
if (!nodes.isEmpty()) {
minCoresPerNode = Integer.MAX_VALUE;
minActualShardsPerNode = Integer.MAX_VALUE;
minShardReplicasPerNode = Integer.MAX_VALUE;
for (Map<String, AtomicInteger> counts : nodes.values()) {
int total = counts.values().stream().mapToInt(c -> c.get()).sum();
for (AtomicInteger count : counts.values()) {
if (count.get() > maxShardReplicasPerNode) {
maxShardReplicasPerNode = count.get();
}
if (count.get() < minShardReplicasPerNode) {
minShardReplicasPerNode = count.get();
}
}
if (total > maxCoresPerNode) {
maxCoresPerNode = total;
}
if (total < minCoresPerNode) {
minCoresPerNode = total;
}
if (counts.size() > maxActualShardsPerNode) {
maxActualShardsPerNode = counts.size();
}
if (counts.size() < minActualShardsPerNode) {
minActualShardsPerNode = counts.size();
}
}
}
perColl.put("activeShards", coll.getActiveSlices().size());
perColl.put("inactiveShards", coll.getSlices().size() - coll.getActiveSlices().size());
perColl.put("rf", coll.getReplicationFactor());
perColl.put("maxShardsPerNode", coll.getMaxShardsPerNode());
perColl.put("maxActualShardsPerNode", maxActualShardsPerNode);
perColl.put("minActualShardsPerNode", minActualShardsPerNode);
perColl.put("maxShardReplicasPerNode", maxShardReplicasPerNode);
perColl.put("minShardReplicasPerNode", minShardReplicasPerNode);
perColl.put("numCores", numCores.get());
perColl.put("numNodes", nodes.size());
perColl.put("maxCoresPerNode", maxCoresPerNode);
perColl.put("minCoresPerNode", minCoresPerNode);
});
Map<String, Map<String, Object>> nodeStats = new TreeMap<>();
Map<Integer, AtomicInteger> coreStats = new TreeMap<>();
for (Row row : session.getSortedNodes()) {
Map<String, Object> nodeStat = nodeStats.computeIfAbsent(row.node, n -> new LinkedHashMap<>());
nodeStat.put("isLive", row.isLive());
nodeStat.put("freedisk", row.getVal("freedisk", 0));
nodeStat.put("totaldisk", row.getVal("totaldisk", 0));
int cores = ((Number)row.getVal("cores", 0)).intValue();
nodeStat.put("cores", cores);
coreStats.computeIfAbsent(cores, num -> new AtomicInteger()).incrementAndGet();
Map<String, Map<String, Map<String, Object>>> collReplicas = new TreeMap<>();
row.forEachReplica(ri -> {
Map<String, Object> perReplica = collReplicas.computeIfAbsent(ri.getCollection(), c -> new TreeMap<>())
.computeIfAbsent(ri.getCore().substring(ri.getCollection().length() + 1), core -> new LinkedHashMap<>());
perReplica.put("INDEX.sizeInGB", ri.getVariable("INDEX.sizeInGB"));
perReplica.put("coreNode", ri.getName());
if (ri.getBool("leader", false)) {
perReplica.put("leader", true);
Double totalSize = (Double)collStats.computeIfAbsent(ri.getCollection(), c -> new HashMap<>())
.computeIfAbsent("avgShardSize", size -> 0.0);
Number riSize = (Number)ri.getVariable("INDEX.sizeInGB");
if (riSize != null) {
totalSize += riSize.doubleValue();
collStats.get(ri.getCollection()).put("avgShardSize", totalSize);
Double max = (Double)collStats.get(ri.getCollection()).get("maxShardSize");
if (max == null) max = 0.0;
if (riSize.doubleValue() > max) {
collStats.get(ri.getCollection()).put("maxShardSize", riSize.doubleValue());
}
Double min = (Double)collStats.get(ri.getCollection()).get("minShardSize");
if (min == null) min = Double.MAX_VALUE;
if (riSize.doubleValue() < min) {
collStats.get(ri.getCollection()).put("minShardSize", riSize.doubleValue());
}
}
}
nodeStat.put("replicas", collReplicas);
});
Map<String, Object> results = prepareResults(cloudManager, config, withClusterState,
withStats, withSuggestions, withSortedNodes, withDiagnostics);
if (cli.hasOption("simulate")) {
String iterStr = cli.getOptionValue("i", "10");
int iterations;
try {
iterations = Integer.parseInt(iterStr);
} catch (Exception e) {
log.warn("Invalid option 'i' value, using default 10:" + e);
iterations = 10;
}
// calculate average per shard
for (Map<String, Number> perColl : collStats.values()) {
Double avg = (Double)perColl.get("avgShardSize");
if (avg != null) {
avg = avg / ((Number)perColl.get("activeShards")).doubleValue();
perColl.put("avgShardSize", avg);
}
}
Map<String, Object> stats = new LinkedHashMap<>();
results.put("STATISTICS", stats);
stats.put("coresPerNodes", coreStats);
stats.put("nodeStats", nodeStats);
stats.put("collectionStats", collStats);
}
if (withSuggestions) {
results.put("SUGGESTIONS", suggestions);
}
if (!withSortedNodes) {
diagnostics.remove("sortedNodes");
}
if (withDiagnostics) {
results.put("DIAGNOSTICS", diagnostics);
Map<String, Object> simulationResults = new HashMap<>();
simulate(cloudManager, config, simulationResults, withClusterState,
withStats, withSuggestions, withSortedNodes, withDiagnostics, iterations);
results.put("simulation", simulationResults);
}
String data = Utils.toJSONString(results);
if (redact) {
@ -1131,6 +1023,223 @@ public class SolrCLI {
stdout.println(data);
}
}
private Map<String, Object> prepareResults(SolrCloudManager clientCloudManager,
AutoScalingConfig config,
boolean withClusterState,
boolean withStats,
boolean withSuggestions,
boolean withSortedNodes,
boolean withDiagnostics) throws Exception {
Policy.Session session = config.getPolicy().createSession(clientCloudManager);
ClusterState clusterState = clientCloudManager.getClusterStateProvider().getClusterState();
if (verbose) {
log.info("- calculating suggestions...");
}
long start = TimeSource.NANO_TIME.getTimeNs();
List<Suggester.SuggestionInfo> suggestions = PolicyHelper.getSuggestions(config, clientCloudManager);
long end = TimeSource.NANO_TIME.getTimeNs();
if (verbose) {
log.info(" (took " + TimeUnit.NANOSECONDS.toMillis(end - start) + " ms)");
log.info("- calculating diagnostics...");
}
start = TimeSource.NANO_TIME.getTimeNs();
MapWriter mw = PolicyHelper.getDiagnostics(session);
Map<String, Object> diagnostics = new LinkedHashMap<>();
mw.toMap(diagnostics);
end = TimeSource.NANO_TIME.getTimeNs();
if (verbose) {
log.info(" (took " + TimeUnit.NANOSECONDS.toMillis(end - start) + " ms)");
}
Map<String, Object> results = new LinkedHashMap<>();
if (withClusterState) {
Map<String, Object> map = new LinkedHashMap<>();
map.put("znodeVersion", clusterState.getZNodeVersion());
map.put("liveNodes", new TreeSet<>(clusterState.getLiveNodes()));
map.put("collections", clusterState.getCollectionsMap());
results.put("CLUSTERSTATE", map);
}
if (withStats) {
Map<String, Map<String, Number>> collStats = new TreeMap<>();
clusterState.forEachCollection(coll -> {
Map<String, Number> perColl = collStats.computeIfAbsent(coll.getName(), n -> new LinkedHashMap<>());
AtomicInteger numCores = new AtomicInteger();
HashMap<String, Map<String, AtomicInteger>> nodes = new HashMap<>();
coll.getSlices().forEach(s -> {
numCores.addAndGet(s.getReplicas().size());
s.getReplicas().forEach(r -> {
nodes.computeIfAbsent(r.getNodeName(), n -> new HashMap<>())
.computeIfAbsent(s.getName(), slice -> new AtomicInteger()).incrementAndGet();
});
});
int maxCoresPerNode = 0;
int minCoresPerNode = 0;
int maxActualShardsPerNode = 0;
int minActualShardsPerNode = 0;
int maxShardReplicasPerNode = 0;
int minShardReplicasPerNode = 0;
if (!nodes.isEmpty()) {
minCoresPerNode = Integer.MAX_VALUE;
minActualShardsPerNode = Integer.MAX_VALUE;
minShardReplicasPerNode = Integer.MAX_VALUE;
for (Map<String, AtomicInteger> counts : nodes.values()) {
int total = counts.values().stream().mapToInt(c -> c.get()).sum();
for (AtomicInteger count : counts.values()) {
if (count.get() > maxShardReplicasPerNode) {
maxShardReplicasPerNode = count.get();
}
if (count.get() < minShardReplicasPerNode) {
minShardReplicasPerNode = count.get();
}
}
if (total > maxCoresPerNode) {
maxCoresPerNode = total;
}
if (total < minCoresPerNode) {
minCoresPerNode = total;
}
if (counts.size() > maxActualShardsPerNode) {
maxActualShardsPerNode = counts.size();
}
if (counts.size() < minActualShardsPerNode) {
minActualShardsPerNode = counts.size();
}
}
}
perColl.put("activeShards", coll.getActiveSlices().size());
perColl.put("inactiveShards", coll.getSlices().size() - coll.getActiveSlices().size());
perColl.put("rf", coll.getReplicationFactor());
perColl.put("maxShardsPerNode", coll.getMaxShardsPerNode());
perColl.put("maxActualShardsPerNode", maxActualShardsPerNode);
perColl.put("minActualShardsPerNode", minActualShardsPerNode);
perColl.put("maxShardReplicasPerNode", maxShardReplicasPerNode);
perColl.put("minShardReplicasPerNode", minShardReplicasPerNode);
perColl.put("numCores", numCores.get());
perColl.put("numNodes", nodes.size());
perColl.put("maxCoresPerNode", maxCoresPerNode);
perColl.put("minCoresPerNode", minCoresPerNode);
});
Map<String, Map<String, Object>> nodeStats = new TreeMap<>();
Map<Integer, AtomicInteger> coreStats = new TreeMap<>();
for (Row row : session.getSortedNodes()) {
Map<String, Object> nodeStat = nodeStats.computeIfAbsent(row.node, n -> new LinkedHashMap<>());
nodeStat.put("isLive", row.isLive());
nodeStat.put("freedisk", row.getVal("freedisk", 0));
nodeStat.put("totaldisk", row.getVal("totaldisk", 0));
int cores = ((Number)row.getVal("cores", 0)).intValue();
nodeStat.put("cores", cores);
coreStats.computeIfAbsent(cores, num -> new AtomicInteger()).incrementAndGet();
Map<String, Map<String, Map<String, Object>>> collReplicas = new TreeMap<>();
row.forEachReplica(ri -> {
Map<String, Object> perReplica = collReplicas.computeIfAbsent(ri.getCollection(), c -> new TreeMap<>())
.computeIfAbsent(ri.getCore().substring(ri.getCollection().length() + 1), core -> new LinkedHashMap<>());
// if (ri.getVariable(Variable.Type.CORE_IDX.tagName) != null) {
// perReplica.put(Variable.Type.CORE_IDX.tagName, ri.getVariable(Variable.Type.CORE_IDX.tagName));
// }
if (ri.getVariable(Variable.Type.CORE_IDX.metricsAttribute) != null) {
perReplica.put(Variable.Type.CORE_IDX.metricsAttribute, ri.getVariable(Variable.Type.CORE_IDX.metricsAttribute));
}
perReplica.put("coreNode", ri.getName());
if (ri.getBool("leader", false)) {
perReplica.put("leader", true);
Double totalSize = (Double)collStats.computeIfAbsent(ri.getCollection(), c -> new HashMap<>())
.computeIfAbsent("avgShardSize", size -> 0.0);
Number riSize = (Number)ri.getVariable(Variable.Type.CORE_IDX.metricsAttribute);
if (riSize != null) {
totalSize += riSize.doubleValue();
collStats.get(ri.getCollection()).put("avgShardSize", totalSize);
Double max = (Double)collStats.get(ri.getCollection()).get("maxShardSize");
if (max == null) max = 0.0;
if (riSize.doubleValue() > max) {
collStats.get(ri.getCollection()).put("maxShardSize", riSize.doubleValue());
}
Double min = (Double)collStats.get(ri.getCollection()).get("minShardSize");
if (min == null) min = Double.MAX_VALUE;
if (riSize.doubleValue() < min) {
collStats.get(ri.getCollection()).put("minShardSize", riSize.doubleValue());
}
}
}
nodeStat.put("replicas", collReplicas);
});
}
// calculate average per shard and convert the units
for (Map<String, Number> perColl : collStats.values()) {
Number avg = perColl.get("avgShardSize");
if (avg != null) {
avg = avg.doubleValue() / perColl.get("activeShards").doubleValue();
perColl.put("avgShardSize", (Number)Variable.Type.CORE_IDX.convertVal(avg));
}
Number num = perColl.get("maxShardSize");
if (num != null) {
perColl.put("maxShardSize", (Number)Variable.Type.CORE_IDX.convertVal(num));
}
num = perColl.get("minShardSize");
if (num != null) {
perColl.put("minShardSize", (Number)Variable.Type.CORE_IDX.convertVal(num));
}
}
Map<String, Object> stats = new LinkedHashMap<>();
results.put("STATISTICS", stats);
stats.put("coresPerNodes", coreStats);
stats.put("nodeStats", nodeStats);
stats.put("collectionStats", collStats);
}
if (withSuggestions) {
results.put("SUGGESTIONS", suggestions);
}
if (!withSortedNodes) {
diagnostics.remove("sortedNodes");
}
if (withDiagnostics) {
results.put("DIAGNOSTICS", diagnostics);
}
return results;
}
private void simulate(SimCloudManager simCloudManager,
AutoScalingConfig config,
Map<String, Object> results,
boolean withClusterState,
boolean withStats,
boolean withSuggestions,
boolean withSortedNodes,
boolean withDiagnostics, int iterations) throws Exception {
int loop = iterations;
List<Suggester.SuggestionInfo> suggestions = Collections.emptyList();
Map<String, Object> intermediate = new LinkedHashMap<>();
results.put("intermediateSuggestions", intermediate);
while (loop-- > 0) {
suggestions = PolicyHelper.getSuggestions(config, simCloudManager);
log.info("-- step " + (iterations - loop) + ", " + suggestions.size() + " suggestions.");
if (suggestions.isEmpty()) {
break;
}
intermediate.put("step" + (iterations - loop), suggestions);
int unresolvedCount = 0;
for (Suggester.SuggestionInfo suggestion : suggestions) {
SolrRequest operation = suggestion.getOperation();
if (operation == null) {
unresolvedCount++;
if (suggestion.getViolation() == null) {
log.info(" - ignoring suggestion without violation and without operation: " + suggestion);
}
continue;
}
simCloudManager.request(operation);
}
if (unresolvedCount == suggestions.size()) {
log.info("--- aborting simulation, only unresolved violations remain");
break;
}
}
if (loop == 0 && !suggestions.isEmpty()) {
System.err.println("### Failed to apply all suggestions in " + iterations + " steps. Remaining suggestions: " + suggestions);
}
results.put("finalState", prepareResults(simCloudManager, config, withClusterState, withStats,
withSuggestions, withSortedNodes, withDiagnostics));
}
}
/**
@ -3793,6 +3902,9 @@ public class SolrCLI {
String excMsg = exc.getMessage();
if (excMsg != null) {
System.err.println("\nERROR: " + excMsg + "\n");
if (verbose) {
exc.printStackTrace(System.err);
}
toolExitStatus = 100; // Exit >= 100 means error, else means number of tests that failed
} else {
throw exc;

View File

@ -18,17 +18,11 @@
package org.apache.solr.cloud;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrResponse;
@ -37,17 +31,8 @@ import org.apache.solr.client.solrj.request.RequestWriter;
import org.apache.solr.client.solrj.request.RequestWriter.StringPayloadContentWriter;
import org.apache.solr.client.solrj.request.V2Request;
import org.apache.solr.client.solrj.response.SolrResponseBase;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.CollectionStatePredicate;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.util.TimeOut;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.junit.Assert;
import static org.apache.solr.common.params.CommonParams.JSON_MIME;
@ -57,139 +42,9 @@ import static org.apache.solr.common.params.CommonParams.JSON_MIME;
* Some useful methods for SolrCloud tests.
*/
public class CloudTestUtils {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final int DEFAULT_TIMEOUT = 90;
/**
* Wait for a particular collection state to appear.
*
* This is a convenience method using the {@link #DEFAULT_TIMEOUT}
*
* @param cloudManager current instance of {@link SolrCloudManager}
* @param message a message to report on failure
* @param collection the collection to watch
* @param predicate a predicate to match against the collection state
*/
public static long waitForState(final SolrCloudManager cloudManager,
final String message,
final String collection,
final CollectionStatePredicate predicate) {
AtomicReference<DocCollection> state = new AtomicReference<>();
AtomicReference<Set<String>> liveNodesLastSeen = new AtomicReference<>();
try {
return waitForState(cloudManager, collection, DEFAULT_TIMEOUT, TimeUnit.SECONDS, (n, c) -> {
state.set(c);
liveNodesLastSeen.set(n);
return predicate.matches(n, c);
});
} catch (Exception e) {
throw new AssertionError(message + "\n" + "Live Nodes: " + liveNodesLastSeen.get() + "\nLast available state: " + state.get(), e);
}
}
/**
* Wait for a particular collection state to appear.
*
* This is a convenience method using the {@link #DEFAULT_TIMEOUT}
*
* @param cloudManager current instance of {@link SolrCloudManager}
* @param collection the collection to watch
* @param wait timeout value
* @param unit timeout unit
* @param predicate a predicate to match against the collection state
*/
public static long waitForState(final SolrCloudManager cloudManager,
final String collection,
long wait,
final TimeUnit unit,
final CollectionStatePredicate predicate) throws InterruptedException, TimeoutException, IOException {
TimeOut timeout = new TimeOut(wait, unit, cloudManager.getTimeSource());
long timeWarn = timeout.timeLeft(TimeUnit.MILLISECONDS) / 4;
ClusterState state = null;
DocCollection coll = null;
while (!timeout.hasTimedOut()) {
state = cloudManager.getClusterStateProvider().getClusterState();
coll = state.getCollectionOrNull(collection);
// due to the way we manage collections in SimClusterStateProvider a null here
// can mean that a collection is still being created but has no replicas
if (coll == null) { // does not yet exist?
timeout.sleep(100);
continue;
}
if (predicate.matches(state.getLiveNodes(), coll)) {
log.trace("-- predicate matched with state {}", state);
return timeout.timeElapsed(TimeUnit.MILLISECONDS);
}
timeout.sleep(100);
if (timeout.timeLeft(TimeUnit.MILLISECONDS) < timeWarn) {
log.trace("-- still not matching predicate: {}", state);
}
}
throw new TimeoutException("last ClusterState: " + state + ", last coll state: " + coll);
}
/**
* Return a {@link CollectionStatePredicate} that returns true if a collection has the expected
* number of active shards and replicas
* @param expectedShards expected number of active shards
* @param expectedReplicas expected number of active replicas
*/
public static CollectionStatePredicate clusterShape(int expectedShards, int expectedReplicas) {
return clusterShape(expectedShards, expectedReplicas, false, false);
}
/**
* Return a {@link CollectionStatePredicate} that returns true if a collection has the expected
* number of shards and replicas.
* <p>Note: for shards marked as inactive the current Solr behavior is that replicas remain active.
* {@link org.apache.solr.cloud.autoscaling.sim.SimCloudManager} follows this behavior.</p>
* @param expectedShards expected number of shards
* @param expectedReplicas expected number of active replicas
* @param withInactive if true then count also inactive shards
* @param requireLeaders if true then require that each shard has a leader
*/
public static CollectionStatePredicate clusterShape(int expectedShards, int expectedReplicas, boolean withInactive,
boolean requireLeaders) {
return (liveNodes, collectionState) -> {
if (collectionState == null) {
log.info("-- null collection");
return false;
}
Collection<Slice> slices = withInactive ? collectionState.getSlices() : collectionState.getActiveSlices();
if (slices.size() != expectedShards) {
log.info("-- wrong number of slices for collection {}, expected={}, found={}: {}", collectionState.getName(), expectedShards, collectionState.getSlices().size(), collectionState.getSlices());
return false;
}
Set<String> leaderless = new HashSet<>();
for (Slice slice : slices) {
int activeReplicas = 0;
if (requireLeaders && slice.getState() != Slice.State.INACTIVE && slice.getLeader() == null) {
leaderless.add(slice.getName());
continue;
}
// skip other checks, we're going to fail anyway
if (!leaderless.isEmpty()) {
continue;
}
for (Replica replica : slice) {
if (replica.isActive(liveNodes))
activeReplicas++;
}
if (activeReplicas != expectedReplicas) {
log.info("-- wrong number of active replicas for collection {} in slice {}, expected={}, found={}", collectionState.getName(), slice.getName(), expectedReplicas, activeReplicas);
return false;
}
}
if (leaderless.isEmpty()) {
return true;
} else {
log.info("-- shards without leaders: {}", leaderless);
return false;
}
};
}
/**
* Wait for a particular named trigger to be scheduled.
* <p>

View File

@ -75,8 +75,8 @@ public class MetricsHistoryIntegrationTest extends SolrCloudTestCase {
// create .system
CollectionAdminRequest.createCollection(CollectionAdminParams.SYSTEM_COLL, null, 1, 1)
.process(solrClient);
CloudTestUtils.waitForState(cloudManager, CollectionAdminParams.SYSTEM_COLL,
30, TimeUnit.SECONDS, CloudTestUtils.clusterShape(1, 1));
CloudUtil.waitForState(cloudManager, CollectionAdminParams.SYSTEM_COLL,
30, TimeUnit.SECONDS, CloudUtil.clusterShape(1, 1));
solrClient.query(CollectionAdminParams.SYSTEM_COLL, params(CommonParams.Q, "*:*"));
// sleep a little to allow the handler to collect some metrics
if (simulated) {

View File

@ -67,6 +67,7 @@ import org.apache.solr.handler.component.HttpShardHandlerFactory;
import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.update.UpdateShardHandler;
import org.apache.solr.util.TimeOut;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.data.Stat;
import org.junit.After;
@ -398,7 +399,7 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
if (data == null || data.length == 0) {
return null;
}
return new VersionedData(-1, data, "");
return new VersionedData(-1, data, CreateMode.PERSISTENT, "");
});
@ -478,7 +479,7 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
if (data == null || data.length == 0) {
return null;
}
return new VersionedData(-1, data, "");
return new VersionedData(-1, data, CreateMode.PERSISTENT, "");
});

View File

@ -133,7 +133,7 @@ public class ReindexCollectionTest extends SolrCloudTestCase {
assertEquals(status.toString(), (long)NUM_DOCS, ((Number)status.get("inputDocs")).longValue());
assertEquals(status.toString(), (long)NUM_DOCS, ((Number)status.get("processedDocs")).longValue());
CloudTestUtils.waitForState(cloudManager, "did not finish copying in time", targetCollection, (liveNodes, coll) -> {
CloudUtil.waitForState(cloudManager, "did not finish copying in time", targetCollection, (liveNodes, coll) -> {
ReindexCollectionCmd.State state = ReindexCollectionCmd.State.get(coll.getStr(ReindexCollectionCmd.REINDEXING_STATE));
return ReindexCollectionCmd.State.FINISHED == state;
});
@ -171,7 +171,7 @@ public class ReindexCollectionTest extends SolrCloudTestCase {
}
assertNotNull("target collection not present after 30s", realTargetCollection);
CloudTestUtils.waitForState(cloudManager, "did not finish copying in time", realTargetCollection, (liveNodes, coll) -> {
CloudUtil.waitForState(cloudManager, "did not finish copying in time", realTargetCollection, (liveNodes, coll) -> {
ReindexCollectionCmd.State state = ReindexCollectionCmd.State.get(coll.getStr(ReindexCollectionCmd.REINDEXING_STATE));
return ReindexCollectionCmd.State.FINISHED == state;
});
@ -199,7 +199,7 @@ public class ReindexCollectionTest extends SolrCloudTestCase {
.setConfigName("conf3");
req.process(solrClient);
CloudTestUtils.waitForState(cloudManager, "did not finish copying in time", targetCollection, (liveNodes, coll) -> {
CloudUtil.waitForState(cloudManager, "did not finish copying in time", targetCollection, (liveNodes, coll) -> {
ReindexCollectionCmd.State state = ReindexCollectionCmd.State.get(coll.getStr(ReindexCollectionCmd.REINDEXING_STATE));
return ReindexCollectionCmd.State.FINISHED == state;
});
@ -234,7 +234,7 @@ public class ReindexCollectionTest extends SolrCloudTestCase {
.setCollectionParam("q", "id:10*");
req.process(solrClient);
CloudTestUtils.waitForState(cloudManager, "did not finish copying in time", targetCollection, (liveNodes, coll) -> {
CloudUtil.waitForState(cloudManager, "did not finish copying in time", targetCollection, (liveNodes, coll) -> {
ReindexCollectionCmd.State state = ReindexCollectionCmd.State.get(coll.getStr(ReindexCollectionCmd.REINDEXING_STATE));
return ReindexCollectionCmd.State.FINISHED == state;
});
@ -306,7 +306,7 @@ public class ReindexCollectionTest extends SolrCloudTestCase {
assertFalse(coll.getName() + " still exists", coll.getName().startsWith(ReindexCollectionCmd.CHK_COL_PREFIX));
});
// verify that the source collection is read-write and has no reindexing flags
CloudTestUtils.waitForState(cloudManager, "collection state is incorrect", sourceCollection,
CloudUtil.waitForState(cloudManager, "collection state is incorrect", sourceCollection,
((liveNodes, collectionState) ->
!collectionState.isReadOnly() &&
collectionState.getStr(ReindexCollectionCmd.REINDEXING_STATE) == null &&
@ -324,7 +324,7 @@ public class ReindexCollectionTest extends SolrCloudTestCase {
.setTarget(targetCollection);
String asyncId = req.processAsync(solrClient);
// wait for the source collection to be put in readOnly mode
CloudTestUtils.waitForState(cloudManager, "source collection didn't become readOnly",
CloudUtil.waitForState(cloudManager, "source collection didn't become readOnly",
sourceCollection, (liveNodes, coll) -> coll.isReadOnly());
req = CollectionAdminRequest.reindexCollection(sourceCollection);
@ -334,7 +334,7 @@ public class ReindexCollectionTest extends SolrCloudTestCase {
assertNotNull(rsp.toString(), status);
assertEquals(status.toString(), "aborting", status.get("state"));
CloudTestUtils.waitForState(cloudManager, "incorrect collection state", sourceCollection,
CloudUtil.waitForState(cloudManager, "incorrect collection state", sourceCollection,
((liveNodes, collectionState) ->
collectionState.isReadOnly() &&
getState(sourceCollection) == ReindexCollectionCmd.State.ABORTED));
@ -347,7 +347,7 @@ public class ReindexCollectionTest extends SolrCloudTestCase {
assertEquals(status.toString(), "aborted", status.get("state"));
// let the process continue
TestInjection.reindexLatch.countDown();
CloudTestUtils.waitForState(cloudManager, "source collection is in wrong state",
CloudUtil.waitForState(cloudManager, "source collection is in wrong state",
sourceCollection, (liveNodes, docCollection) -> !docCollection.isReadOnly() && getState(sourceCollection) == null);
// verify the response
rsp = CollectionAdminRequest.requestStatus(asyncId).process(solrClient);

View File

@ -40,8 +40,8 @@ import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventProcessorStage
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.cloud.CloudTestUtils;
import org.apache.solr.cloud.CloudTestUtils.AutoScalingRequest;
import org.apache.solr.cloud.CloudUtil;
import org.apache.solr.cloud.SolrCloudTestCase;
import org.apache.solr.cloud.autoscaling.sim.SimCloudManager;
import org.apache.solr.common.SolrInputDocument;
@ -152,8 +152,8 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
if (SPEED == 1) {
cluster.waitForActiveCollection(collectionName, 2, 4);
} else {
CloudTestUtils.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
CloudTestUtils.clusterShape(2, 2, false, true));
CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
CloudUtil.clusterShape(2, 2, false, true));
}
long waitForSeconds = 3 + random().nextInt(5);
@ -258,8 +258,8 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
if (SPEED == 1) {
cluster.waitForActiveCollection(collectionName, 2, 4);
} else {
CloudTestUtils.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
CloudTestUtils.clusterShape(2, 2, false, true));
CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
CloudUtil.clusterShape(2, 2, false, true));
}
long waitForSeconds = 6 + random().nextInt(5);
@ -328,7 +328,7 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
boolean await = finished.await(60000, TimeUnit.MILLISECONDS);
assertTrue("did not finish processing in time", await);
CloudTestUtils.waitForState(cloudManager, collectionName, 20, TimeUnit.SECONDS, CloudTestUtils.clusterShape(6, 2, true, true));
CloudUtil.waitForState(cloudManager, collectionName, 20, TimeUnit.SECONDS, CloudUtil.clusterShape(6, 2, true, true));
assertEquals(1, listenerEvents.size());
List<CapturedEvent> events = listenerEvents.get("capturing2");
assertNotNull("'capturing2' events not found", events);
@ -382,8 +382,8 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
if (SPEED == 1) {
cluster.waitForActiveCollection(collectionName, 2, 4);
} else {
CloudTestUtils.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
CloudTestUtils.clusterShape(2, 2, false, true));
CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
CloudUtil.clusterShape(2, 2, false, true));
}
for (int i = 0; i < 20; i++) {
@ -495,8 +495,8 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
"conf", 2, 2).setMaxShardsPerNode(2);
create.process(solrClient);
CloudTestUtils.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
CloudTestUtils.clusterShape(2, 2, false, true));
CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
CloudUtil.clusterShape(2, 2, false, true));
for (int j = 0; j < 10; j++) {
UpdateRequest ureq = new UpdateRequest();
@ -621,8 +621,8 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
assertEquals(TriggerEventProcessorStage.SUCCEEDED, events.get(5).stage);
// collection should have 2 inactive and 4 active shards
CloudTestUtils.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
CloudTestUtils.clusterShape(6, 2, true, true));
CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
CloudUtil.clusterShape(6, 2, true, true));
// check ops
List<TriggerEvent.Op> ops = (List<TriggerEvent.Op>) events.get(4).event.getProperty(TriggerEvent.REQUESTED_OPS);
@ -759,8 +759,8 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
if (SPEED == 1) {
cluster.waitForActiveCollection(collectionName, 5, 10);
} else {
CloudTestUtils.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
CloudTestUtils.clusterShape(5, 2, false, true));
CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
CloudUtil.clusterShape(5, 2, false, true));
}
long waitForSeconds = 3 + random().nextInt(5);
@ -901,8 +901,8 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
"conf", 2, 2).setMaxShardsPerNode(2);
create.process(solrClient);
CloudTestUtils.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
CloudTestUtils.clusterShape(2, 2, false, true));
CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
CloudUtil.clusterShape(2, 2, false, true));
long waitForSeconds = 3 + random().nextInt(5);
Map<String, Object> props = createTriggerProps(waitForSeconds);

View File

@ -34,8 +34,8 @@ import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventProcessorStage;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.cloud.CloudTestUtils;
import org.apache.solr.cloud.CloudTestUtils.AutoScalingRequest;
import org.apache.solr.cloud.CloudUtil;
import org.apache.solr.cloud.SolrCloudTestCase;
import org.apache.solr.cloud.autoscaling.sim.SimCloudManager;
import org.apache.solr.common.cloud.ClusterState;
@ -206,8 +206,8 @@ public class ScheduledMaintenanceTriggerTest extends SolrCloudTestCase {
"conf", 1, 1);
create1.process(solrClient);
CloudTestUtils.waitForState(cloudManager, "failed to create " + collection1, collection1,
CloudTestUtils.clusterShape(1, 1));
CloudUtil.waitForState(cloudManager, "failed to create " + collection1, collection1,
CloudUtil.clusterShape(1, 1));
// also create a very stale lock
Map<String, Object> lockData = new HashMap<>();
@ -257,8 +257,8 @@ public class ScheduledMaintenanceTriggerTest extends SolrCloudTestCase {
CollectionAdminRequest.SplitShard split1 = CollectionAdminRequest.splitShard(collection1)
.setShardName("shard1");
split1.process(solrClient);
CloudTestUtils.waitForState(cloudManager, "failed to split " + collection1, collection1,
CloudTestUtils.clusterShape(3, 1, true, true));
CloudUtil.waitForState(cloudManager, "failed to split " + collection1, collection1,
CloudUtil.clusterShape(3, 1, true, true));
await = triggerFired.await(90, TimeUnit.SECONDS);
@ -309,7 +309,7 @@ public class ScheduledMaintenanceTriggerTest extends SolrCloudTestCase {
ClusterState state = cloudManager.getClusterStateProvider().getClusterState();
CloudTestUtils.clusterShape(2, 1).matches(state.getLiveNodes(), state.getCollection(collection1));
CloudUtil.clusterShape(2, 1).matches(state.getLiveNodes(), state.getCollection(collection1));
}
public static CountDownLatch getTriggerFired() {

View File

@ -36,6 +36,7 @@ import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventProcessorStage
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.cloud.CloudTestUtils;
import org.apache.solr.cloud.CloudUtil;
import org.apache.solr.cloud.SolrCloudTestCase;
import org.apache.solr.common.MapWriter;
import org.apache.solr.common.cloud.Replica;
@ -124,8 +125,8 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
"conf", 1, 2);
create.process(solrClient);
CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
CloudTestUtils.clusterShape(1, 2));
CloudUtil.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
CloudUtil.clusterShape(1, 2));
CloudTestUtils.assertAutoScalingRequest
(cloudManager,
@ -276,8 +277,8 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(COLL1,
"conf", 1, 2);
create.process(solrClient);
CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
CloudTestUtils.clusterShape(1, 2));
CloudUtil.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
CloudUtil.clusterShape(1, 2));
// add a couple of spare replicas above RF. Use different types.
// these additional replicas will be placed on other nodes in the cluster
@ -285,8 +286,8 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
solrClient.request(CollectionAdminRequest.addReplicaToShard(COLL1, "shard1", Replica.Type.TLOG));
solrClient.request(CollectionAdminRequest.addReplicaToShard(COLL1, "shard1", Replica.Type.PULL));
CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
CloudTestUtils.clusterShape(1, 5));
CloudUtil.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
CloudUtil.clusterShape(1, 5));
CloudTestUtils.assertAutoScalingRequest
(cloudManager,
@ -393,8 +394,8 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
assertEquals("cold replicas", 3, coldReplicas.get());
// now the collection should be down to RF = 2
CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
CloudTestUtils.clusterShape(1, 2));
CloudUtil.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
CloudUtil.clusterShape(1, 2));
listenerEvents.clear();
listenerEventLatch = new CountDownLatch(3);
@ -494,8 +495,8 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
assertEquals("coldNodes: " +ops.toString(), 2, coldNodes2.get());
// now the collection should be at RF == 1, with one additional PULL replica
CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
CloudTestUtils.clusterShape(1, 1));
CloudUtil.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
CloudUtil.clusterShape(1, 1));
}
@Test
@ -507,8 +508,8 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
"conf", 1, 2);
create.process(solrClient);
CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
CloudTestUtils.clusterShape(1, 2));
CloudUtil.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
CloudUtil.clusterShape(1, 2));
// add a couple of spare replicas above RF. Use different types to verify that only
// searchable replicas are considered
@ -517,8 +518,8 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
solrClient.request(CollectionAdminRequest.addReplicaToShard(COLL1, "shard1", Replica.Type.TLOG));
solrClient.request(CollectionAdminRequest.addReplicaToShard(COLL1, "shard1", Replica.Type.PULL));
CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
CloudTestUtils.clusterShape(1, 5));
CloudUtil.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
CloudUtil.clusterShape(1, 5));
CloudTestUtils.assertAutoScalingRequest
(cloudManager,
@ -687,8 +688,8 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
assertEquals(responses.toString(), 4, nodes.get());
// we are left with one searchable replica
CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
CloudTestUtils.clusterShape(1, 1));
CloudUtil.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
CloudUtil.clusterShape(1, 1));
}
public static class CapturingTriggerListener extends TriggerListenerBase {

View File

@ -39,7 +39,7 @@ import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.impl.SolrClientCloudManager;
import org.apache.solr.client.solrj.impl.SolrClientNodeStateProvider;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.cloud.CloudTestUtils;
import org.apache.solr.cloud.CloudUtil;
import org.apache.solr.cloud.SolrCloudTestCase;
import org.apache.solr.cloud.ZkDistributedQueueFactory;
import org.apache.solr.common.cloud.SolrZkClient;
@ -106,8 +106,8 @@ public class SearchRateTriggerTest extends SolrCloudTestCase {
create.setMaxShardsPerNode(1);
create.process(solrClient);
CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS, clusterShape(2, 2));
CloudTestUtils.waitForState(cloudManager, COLL2, 60, TimeUnit.SECONDS, clusterShape(2, 2));
CloudUtil.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS, clusterShape(2, 2));
CloudUtil.waitForState(cloudManager, COLL2, 60, TimeUnit.SECONDS, clusterShape(2, 2));
double rate = 1.0;
URL baseUrl = targetNode.getBaseUrl();
@ -238,7 +238,7 @@ public class SearchRateTriggerTest extends SolrCloudTestCase {
"conf", 2, 2);
create.setMaxShardsPerNode(1);
create.process(solrClient);
CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS, clusterShape(2, 4));
CloudUtil.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS, clusterShape(2, 4));
long waitForSeconds = 5 + random().nextInt(5);
Map<String, Object> props = createTriggerProps(Arrays.asList(COLL1, COLL2), waitForSeconds, 1.0, 0.1);

View File

@ -26,6 +26,7 @@ import java.util.Map;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
import org.apache.solr.client.solrj.cloud.autoscaling.Preference;
@ -72,6 +73,7 @@ public class TestSimClusterStateProvider extends SolrCloudTestCase {
@BeforeClass
public static void setupCluster() throws Exception {
simulated = random().nextBoolean();
simulated = true;
log.info("####### Using simulated components? " + simulated);
configureCluster(NODE_COUNT)
@ -105,17 +107,17 @@ public class TestSimClusterStateProvider extends SolrCloudTestCase {
if (simulated) {
// initialize simulated provider
SimCloudManager simCloudManager = new SimCloudManager(TimeSource.get("simTime:10"));
simCloudManager.getSimClusterStateProvider().simSetClusterProperties(clusterProperties);
simCloudManager.getSimDistribStateManager().simSetAutoScalingConfig(autoScalingConfig);
nodeValues.forEach((n, values) -> {
try {
simCloudManager.getSimNodeStateProvider().simSetNodeValues(n, values);
} catch (InterruptedException e) {
fail("Interrupted:" + e);
}
});
simCloudManager.getSimClusterStateProvider().simSetClusterState(realState);
SimCloudManager simCloudManager = SimCloudManager.createCluster(realManager, TimeSource.get("simTime:10"));
// simCloudManager.getSimClusterStateProvider().simSetClusterProperties(clusterProperties);
// simCloudManager.getSimDistribStateManager().simSetAutoScalingConfig(autoScalingConfig);
// nodeValues.forEach((n, values) -> {
// try {
// simCloudManager.getSimNodeStateProvider().simSetNodeValues(n, values);
// } catch (InterruptedException e) {
// fail("Interrupted:" + e);
// }
// });
// simCloudManager.getSimClusterStateProvider().simSetClusterState(realState);
ClusterState simState = simCloudManager.getClusterStateProvider().getClusterState();
assertClusterStateEquals(realState, simState);
cloudManager = simCloudManager;
@ -137,12 +139,26 @@ public class TestSimClusterStateProvider extends SolrCloudTestCase {
for (Replica oneReplica : slice.getReplicas()) {
Replica twoReplica = sTwo.getReplica(oneReplica.getName());
assertNotNull(twoReplica);
assertEquals(oneReplica, twoReplica);
assertReplicaEquals(oneReplica, twoReplica);
}
});
});
}
private static void assertReplicaEquals(Replica one, Replica two) {
assertEquals(one.getName(), two.getName());
assertEquals(one.getNodeName(), two.getNodeName());
assertEquals(one.getState(), two.getState());
assertEquals(one.getType(), two.getType());
Map<String, Object> filteredPropsOne = one.getProperties().entrySet().stream()
.filter(e -> !(e.getKey().startsWith("INDEX") || e.getKey().startsWith("QUERY") || e.getKey().startsWith("UPDATE")))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
Map<String, Object> filteredPropsTwo = two.getProperties().entrySet().stream()
.filter(e -> !(e.getKey().startsWith("INDEX") || e.getKey().startsWith("QUERY") || e.getKey().startsWith("UPDATE")))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
assertEquals(filteredPropsOne, filteredPropsTwo);
}
private String addNode() throws Exception {
JettySolrRunner solr = cluster.startJettySolrRunner();
cluster.waitForAllNodes(30);

View File

@ -33,8 +33,8 @@ import org.apache.solr.client.solrj.SolrResponse;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.cloud.CloudTestUtils;
import org.apache.solr.cloud.CloudTestUtils.AutoScalingRequest;
import org.apache.solr.cloud.CloudUtil;
import org.apache.solr.cloud.autoscaling.ActionContext;
import org.apache.solr.cloud.autoscaling.ComputePlanAction;
import org.apache.solr.cloud.autoscaling.ScheduledTriggers;
@ -145,8 +145,8 @@ public class TestSimComputePlanAction extends SimSolrCloudTestCase {
"conf",1, 2);
create.process(solrClient);
CloudTestUtils.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
"testNodeLost", CloudTestUtils.clusterShape(1, 2, false, true));
CloudUtil.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
"testNodeLost", CloudUtil.clusterShape(1, 2, false, true));
ClusterState clusterState = cluster.getClusterStateProvider().getClusterState();
log.debug("-- cluster state: {}", clusterState);
@ -211,8 +211,8 @@ public class TestSimComputePlanAction extends SimSolrCloudTestCase {
create.setMaxShardsPerNode(2);
create.process(solrClient);
CloudTestUtils.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
"testNodeWithMultipleReplicasLost", CloudTestUtils.clusterShape(2, 3, false, true));
CloudUtil.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
"testNodeWithMultipleReplicasLost", CloudUtil.clusterShape(2, 3, false, true));
ClusterState clusterState = cluster.getClusterStateProvider().getClusterState();
log.debug("-- cluster state: {}", clusterState);
@ -296,7 +296,7 @@ public class TestSimComputePlanAction extends SimSolrCloudTestCase {
"conf",1, 4).setMaxShardsPerNode(-1);
create.process(solrClient);
CloudTestUtils.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
CloudUtil.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
"testNodeAdded", (liveNodes, collectionState) -> collectionState.getReplicas().stream().allMatch(replica -> replica.isActive(liveNodes)));
// reset to the original policy which has only 1 replica per shard per node

View File

@ -31,8 +31,8 @@ import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.cloud.CloudTestUtils;
import org.apache.solr.cloud.CloudTestUtils.AutoScalingRequest;
import org.apache.solr.cloud.CloudUtil;
import org.apache.solr.cloud.autoscaling.ActionContext;
import org.apache.solr.cloud.autoscaling.ExecutePlanAction;
import org.apache.solr.cloud.autoscaling.NodeLostTrigger;
@ -90,8 +90,8 @@ public class TestSimExecutePlanAction extends SimSolrCloudTestCase {
create.setMaxShardsPerNode(1);
create.process(solrClient);
log.info("Collection ready after " + CloudTestUtils.waitForState(cluster, collectionName, 120, TimeUnit.SECONDS,
CloudTestUtils.clusterShape(1, 2, false, true)) + "ms");
log.info("Collection ready after " + CloudUtil.waitForState(cluster, collectionName, 120, TimeUnit.SECONDS,
CloudUtil.clusterShape(1, 2, false, true)) + "ms");
String sourceNodeName = cluster.getSimClusterStateProvider().simGetRandomNode();
ClusterState clusterState = cluster.getClusterStateProvider().getClusterState();
@ -152,8 +152,8 @@ public class TestSimExecutePlanAction extends SimSolrCloudTestCase {
assertNotNull(response.get("success"));
}
log.info("Collection ready after " + CloudTestUtils.waitForState(cluster, collectionName, 300, TimeUnit.SECONDS,
CloudTestUtils.clusterShape(1, 2, false, true)) + "ms");
log.info("Collection ready after " + CloudUtil.waitForState(cluster, collectionName, 300, TimeUnit.SECONDS,
CloudUtil.clusterShape(1, 2, false, true)) + "ms");
}
@Test
@ -181,8 +181,8 @@ public class TestSimExecutePlanAction extends SimSolrCloudTestCase {
create.setMaxShardsPerNode(1);
create.process(solrClient);
CloudTestUtils.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
collectionName, CloudTestUtils.clusterShape(1, 2, false, true));
CloudUtil.waitForState(cluster, "Timed out waiting for replicas of new collection to be active",
collectionName, CloudUtil.clusterShape(1, 2, false, true));
String sourceNodeName = cluster.getSimClusterStateProvider().simGetRandomNode();
ClusterState clusterState = cluster.getClusterStateProvider().getClusterState();
@ -200,8 +200,8 @@ public class TestSimExecutePlanAction extends SimSolrCloudTestCase {
cluster.getTimeSource().sleep(3000);
CloudTestUtils.waitForState(cluster, "Timed out waiting for replicas of collection to be 2 again",
collectionName, CloudTestUtils.clusterShape(1, 2, false, true));
CloudUtil.waitForState(cluster, "Timed out waiting for replicas of collection to be 2 again",
collectionName, CloudUtil.clusterShape(1, 2, false, true));
clusterState = cluster.getClusterStateProvider().getClusterState();
docCollection = clusterState.getCollection(collectionName);

View File

@ -26,8 +26,8 @@ import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.cloud.CloudTestUtils;
import org.apache.solr.cloud.CloudTestUtils.AutoScalingRequest;
import org.apache.solr.cloud.CloudUtil;
import org.apache.solr.cloud.autoscaling.ExecutePlanAction;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
@ -101,9 +101,9 @@ public class TestSimExtremeIndexing extends SimSolrCloudTestCase {
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
"conf", 2, 2).setMaxShardsPerNode(10);
create.process(solrClient);
CloudTestUtils.waitForState(cluster, collectionName, 90, TimeUnit.SECONDS,
CloudTestUtils.clusterShape(2, 2, false, true));
CloudUtil.waitForState(cluster, collectionName, 90, TimeUnit.SECONDS,
CloudUtil.clusterShape(2, 2, false, true));
//long waitForSeconds = 3 + random().nextInt(5);
long waitForSeconds = 1;

View File

@ -38,6 +38,7 @@ import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventProcessorStage
import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.cloud.CloudTestUtils;
import org.apache.solr.cloud.CloudUtil;
import org.apache.solr.cloud.autoscaling.ActionContext;
import org.apache.solr.cloud.autoscaling.CapturedEvent;
import org.apache.solr.cloud.autoscaling.ComputePlanAction;
@ -194,8 +195,8 @@ public class TestSimLargeCluster extends SimSolrCloudTestCase {
create.setCreateNodeSet(String.join(",", nodes));
create.process(solrClient);
log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 30 * nodes.size(), TimeUnit.SECONDS,
CloudTestUtils.clusterShape(5, 15, false, true)) + "ms");
log.info("Ready after " + CloudUtil.waitForState(cluster, collectionName, 30 * nodes.size(), TimeUnit.SECONDS,
CloudUtil.clusterShape(5, 15, false, true)) + "ms");
int KILL_NODES = 8;
// kill off a number of nodes
@ -203,8 +204,8 @@ public class TestSimLargeCluster extends SimSolrCloudTestCase {
cluster.simRemoveNode(nodes.get(i), false);
}
// should fully recover
log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 90 * KILL_NODES, TimeUnit.SECONDS,
CloudTestUtils.clusterShape(5, 15, false, true)) + "ms");
log.info("Ready after " + CloudUtil.waitForState(cluster, collectionName, 90 * KILL_NODES, TimeUnit.SECONDS,
CloudUtil.clusterShape(5, 15, false, true)) + "ms");
log.info("OP COUNTS: " + cluster.simGetOpCounts());
long moveReplicaOps = cluster.simGetOpCount(CollectionParams.CollectionAction.MOVEREPLICA.name());
@ -237,8 +238,8 @@ public class TestSimLargeCluster extends SimSolrCloudTestCase {
fail("did not finish processing all events in time: started=" + triggerStartedCount.get() + ", finished=" + triggerFinishedCount.get());
}
log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 30 * nodes.size(), TimeUnit.SECONDS,
CloudTestUtils.clusterShape(5, 15, false, true)) + "ms");
log.info("Ready after " + CloudUtil.waitForState(cluster, collectionName, 30 * nodes.size(), TimeUnit.SECONDS,
CloudUtil.clusterShape(5, 15, false, true)) + "ms");
long newMoveReplicaOps = cluster.simGetOpCount(CollectionParams.CollectionAction.MOVEREPLICA.name());
log.info("==== Flaky replicas: {}. Additional MOVEREPLICA count: {}", flakyReplicas, (newMoveReplicaOps - moveReplicaOps));
// flaky nodes lead to a number of MOVEREPLICA that is non-zero but lower than the number of flaky replicas
@ -274,8 +275,8 @@ public class TestSimLargeCluster extends SimSolrCloudTestCase {
// Since our current goal is to try and find situations where cores are just flat out missing
// no matter how long we wait, let's be excessive and generous in our timeout.
// (REMINDER: this uses the cluster's timesource, and ADDREPLICA has a hardcoded delay of 500ms)
CloudTestUtils.waitForState(cluster, name, 2 * totalCores, TimeUnit.SECONDS,
CloudTestUtils.clusterShape(numShards, repsPerShard, false, true));
CloudUtil.waitForState(cluster, name, 2 * totalCores, TimeUnit.SECONDS,
CloudUtil.clusterShape(numShards, repsPerShard, false, true));
final CollectionAdminRequest.Delete delete = CollectionAdminRequest.deleteCollection(name);
log.info("DELETE: {}", delete);
@ -311,8 +312,8 @@ public class TestSimLargeCluster extends SimSolrCloudTestCase {
create.setAutoAddReplicas(false);
create.process(solrClient);
log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 90 * NUM_NODES, TimeUnit.SECONDS,
CloudTestUtils.clusterShape(NUM_NODES / 10, NUM_NODES / 8 * 3, false, true)) + " ms");
log.info("Ready after " + CloudUtil.waitForState(cluster, collectionName, 90 * NUM_NODES, TimeUnit.SECONDS,
CloudUtil.clusterShape(NUM_NODES / 10, NUM_NODES / 8 * 3, false, true)) + " ms");
// start adding nodes
int numAddNode = NUM_NODES / 5;
@ -361,8 +362,8 @@ public class TestSimLargeCluster extends SimSolrCloudTestCase {
log.info("1st check: lastNumOps (MOVEREPLICA) = {}", lastNumOps);
assertTrue("no MOVEREPLICA ops?", lastNumOps > 0);
log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 20 * NUM_NODES, TimeUnit.SECONDS,
CloudTestUtils.clusterShape(NUM_NODES / 10, NUM_NODES / 8 * 3, false, true)) + " ms");
log.info("Ready after " + CloudUtil.waitForState(cluster, collectionName, 20 * NUM_NODES, TimeUnit.SECONDS,
CloudUtil.clusterShape(NUM_NODES / 10, NUM_NODES / 8 * 3, false, true)) + " ms");
int count = 1000;
SolrInputDocument finishedEvent = null;
@ -520,8 +521,8 @@ public class TestSimLargeCluster extends SimSolrCloudTestCase {
create.setAutoAddReplicas(false);
create.process(solrClient);
log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 60 * NUM_NODES, TimeUnit.SECONDS,
CloudTestUtils.clusterShape(NUM_NODES / 5, NUM_NODES / 10, false, true)) + " ms");
log.info("Ready after " + CloudUtil.waitForState(cluster, collectionName, 60 * NUM_NODES, TimeUnit.SECONDS,
CloudUtil.clusterShape(NUM_NODES / 5, NUM_NODES / 10, false, true)) + " ms");
// start killing nodes
int numNodes = NUM_NODES / 5;
@ -604,8 +605,8 @@ public class TestSimLargeCluster extends SimSolrCloudTestCase {
if (listenerEvents.isEmpty()) {
// no failed movements - verify collection shape
log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 20 * NUM_NODES, TimeUnit.SECONDS,
CloudTestUtils.clusterShape(NUM_NODES / 5, NUM_NODES / 10, false, true)) + " ms");
log.info("Ready after " + CloudUtil.waitForState(cluster, collectionName, 20 * NUM_NODES, TimeUnit.SECONDS,
CloudUtil.clusterShape(NUM_NODES / 5, NUM_NODES / 10, false, true)) + " ms");
} else {
cluster.getTimeSource().sleep(NUM_NODES * 100);
}
@ -663,8 +664,8 @@ public class TestSimLargeCluster extends SimSolrCloudTestCase {
"conf", 2, 10);
create.process(solrClient);
log.info("Ready after " + CloudTestUtils.waitForState(cluster, collectionName, 300, TimeUnit.SECONDS,
CloudTestUtils.clusterShape(2, 10, false, true)) + " ms");
log.info("Ready after " + CloudUtil.waitForState(cluster, collectionName, 300, TimeUnit.SECONDS,
CloudUtil.clusterShape(2, 10, false, true)) + " ms");
// collect the node names for shard1
Set<String> nodes = new HashSet<>();

View File

@ -36,8 +36,8 @@ import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo;
import org.apache.solr.client.solrj.cloud.autoscaling.Row;
import org.apache.solr.client.solrj.cloud.autoscaling.Variable.Type;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.cloud.CloudTestUtils;
import org.apache.solr.cloud.CloudTestUtils.AutoScalingRequest;
import org.apache.solr.cloud.CloudUtil;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.ZkStateReader;
@ -75,8 +75,8 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
CollectionAdminRequest.createCollection("perReplicaDataColl", "conf", 1, 5)
.process(solrClient);
CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", "perReplicaDataColl",
CloudTestUtils.clusterShape(1, 5, false, true));
CloudUtil.waitForState(cluster, "Timeout waiting for collection to become active", "perReplicaDataColl",
CloudUtil.clusterShape(1, 5, false, true));
DocCollection coll = getCollectionState("perReplicaDataColl");
String autoScaleJson = "{" +
" 'cluster-preferences': [" +
@ -126,13 +126,13 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
CollectionAdminRequest.createCollection(collectionName, "conf", 1, 1)
.setPolicy("c1")
.process(solrClient);
CloudTestUtils.waitForState(cluster, collectionName, 120, TimeUnit.SECONDS,
CloudTestUtils.clusterShape(1, 1, false, true));
CloudUtil.waitForState(cluster, collectionName, 120, TimeUnit.SECONDS,
CloudUtil.clusterShape(1, 1, false, true));
getCollectionState(collectionName).forEachReplica((s, replica) -> assertEquals(nodeId, replica.getNodeName()));
CollectionAdminRequest.addReplicaToShard(collectionName, "shard1").process(solrClient);
CloudTestUtils.waitForState(cluster,
CloudUtil.waitForState(cluster,
collectionName, 120l, TimeUnit.SECONDS,
(liveNodes, collectionState) -> collectionState.getReplicas().size() == 2);
@ -160,8 +160,8 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
CollectionAdminRequest.createCollection(collectionName, "conf", 1, 2)
.setPolicy("c1")
.process(solrClient);
CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", collectionName,
CloudTestUtils.clusterShape(1, 2, false, true));
CloudUtil.waitForState(cluster, "Timeout waiting for collection to become active", collectionName,
CloudUtil.clusterShape(1, 2, false, true));
DocCollection docCollection = getCollectionState(collectionName);
List<Replica> list = docCollection.getReplicas(firstNode);
@ -174,7 +174,7 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
CollectionAdminRequest.splitShard(collectionName).setShardName("shard1").process(solrClient);
CloudTestUtils.waitForState(cluster, "Timed out waiting to see 6 replicas for collection: " + collectionName,
CloudUtil.waitForState(cluster, "Timed out waiting to see 6 replicas for collection: " + collectionName,
collectionName, (liveNodes, collectionState) -> collectionState.getReplicas().size() == 6);
docCollection = getCollectionState(collectionName);
@ -219,8 +219,8 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
//org.eclipse.jetty.server.handler.DefaultHandler.2xx-responses
CollectionAdminRequest.createCollection("metricsTest", "conf", 1, 1)
.process(solrClient);
CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", "metricsTest",
CloudTestUtils.clusterShape(1, 1));
CloudUtil.waitForState(cluster, "Timeout waiting for collection to become active", "metricsTest",
CloudUtil.clusterShape(1, 1));
DocCollection collection = getCollectionState("metricsTest");
List<String> tags = Arrays.asList("metrics:solr.node:ADMIN./admin/authorization.clientErrors:count",
@ -266,8 +266,8 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
CollectionAdminRequest.createCollectionWithImplicitRouter("policiesTest", "conf", "s1", 1, 1, 1)
.setMaxShardsPerNode(-1)
.process(solrClient);
CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", "policiesTest",
CloudTestUtils.clusterShape(1, 3, false, true));
CloudUtil.waitForState(cluster, "Timeout waiting for collection to become active", "policiesTest",
CloudUtil.clusterShape(1, 3, false, true));
DocCollection coll = getCollectionState("policiesTest");
@ -311,16 +311,16 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
CollectionAdminRequest.createCollectionWithImplicitRouter("policiesTest", "conf", "s1,s2", 1)
.setPolicy("c1")
.process(solrClient);
CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", "policiesTest",
CloudTestUtils.clusterShape(2, 1));
CloudUtil.waitForState(cluster, "Timeout waiting for collection to become active", "policiesTest",
CloudUtil.clusterShape(2, 1));
DocCollection coll = getCollectionState("policiesTest");
assertEquals("c1", coll.getPolicyName());
assertEquals(2,coll.getReplicas().size());
coll.forEachReplica((s, replica) -> assertEquals(nodeId, replica.getNodeName()));
CollectionAdminRequest.createShard("policiesTest", "s3").process(solrClient);
CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", "policiesTest",
CloudTestUtils.clusterShape(3, 1));
CloudUtil.waitForState(cluster, "Timeout waiting for collection to become active", "policiesTest",
CloudUtil.clusterShape(3, 1));
coll = getCollectionState("policiesTest");
assertEquals(1, coll.getSlice("s3").getReplicas().size());
@ -331,8 +331,8 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
SolrClient solrClient = cluster.simGetSolrClient();
CollectionAdminRequest.createCollectionWithImplicitRouter("policiesTest", "conf", "shard1", 2)
.process(solrClient);
CloudTestUtils.waitForState(cluster, "Timeout waiting for collection to become active", "policiesTest",
CloudTestUtils.clusterShape(1, 2, false, true));
CloudUtil.waitForState(cluster, "Timeout waiting for collection to become active", "policiesTest",
CloudUtil.clusterShape(1, 2, false, true));
DocCollection rulesCollection = getCollectionState("policiesTest");
Map<String, Object> val = cluster.getNodeStateProvider().getNodeValues(rulesCollection.getReplicas().get(0).getNodeName(), Arrays.asList(

View File

@ -42,6 +42,7 @@ import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo;
import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventProcessorStage;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.cloud.CloudTestUtils;
import org.apache.solr.cloud.CloudUtil;
import org.apache.solr.cloud.autoscaling.ActionContext;
import org.apache.solr.cloud.autoscaling.AutoScaling;
import org.apache.solr.cloud.autoscaling.CapturedEvent;
@ -1343,8 +1344,8 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(COLL1,
"conf", 1, 2);
create.process(solrClient);
CloudTestUtils.waitForState(cluster, "searchRate testing collection creating",
COLL1, CloudTestUtils.clusterShape(1, 2, false, true));
CloudUtil.waitForState(cluster, "searchRate testing collection creating",
COLL1, CloudUtil.clusterShape(1, 2, false, true));
listenerEventLatch = new CountDownLatch(4);

View File

@ -25,7 +25,7 @@ import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.cloud.CloudTestUtils;
import org.apache.solr.cloud.CloudUtil;
import org.apache.solr.cloud.SolrCloudTestCase;
import org.apache.solr.cloud.autoscaling.sim.SimCloudManager;
import org.apache.solr.common.params.CollectionAdminParams;
@ -96,8 +96,8 @@ public class MetricsHistoryHandlerTest extends SolrCloudTestCase {
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(CollectionAdminParams.SYSTEM_COLL,
"conf", 1, 1);
create.process(solrClient);
CloudTestUtils.waitForState(cloudManager, "failed to create " + CollectionAdminParams.SYSTEM_COLL,
CollectionAdminParams.SYSTEM_COLL, CloudTestUtils.clusterShape(1, 1));
CloudUtil.waitForState(cloudManager, "failed to create " + CollectionAdminParams.SYSTEM_COLL,
CollectionAdminParams.SYSTEM_COLL, CloudUtil.clusterShape(1, 1));
}
@AfterClass

View File

@ -447,3 +447,30 @@ Since there is no node that can host a replica for `shard2` without causing a vi
[ {"cores": "<3", "node": "#ANY"} ]
After re-issuing the `SecondCollection` CREATE command, the replica for `shard1` will be placed on "nodeA": it's least loaded, so is tested first, and no policy violation will result from placement there. The `shard2` replica could be placed on any of the 3 nodes, since they're all equally loaded, and the chosen node will remain below its maximum core count after placement. The CREATE command succeeds.
== Testing the autoscaling configuration and suggestions
It's not always easy to predict the impact of autoscaling configuration changes on the
cluster layout. Starting with release 8.1 Solr provides a tool for assessing the impact of
such changes without affecting the state of the target cluster.
This testing tool is a part of `bin/solr autoscaling` command. In addition to other
options that provide detailed status of the current cluster layout the following options
specifically allow users to test new autoscaling configurations and run "what if" scenarios:
`-a <CONFIG>`::
JSON file containing autoscaling configuration to test. This file needs to be in the same
format as the result of the `/solr/admin/autoscaling` call. If this parameter is missing then the
currently deployed autoscaling configuration is used.
`-simulate`::
Simulate the effects of applying all autoscaling suggestions on the cluster layout. NOTE: this does not
affect in any way the actual cluster - this option uses the simulation framework to calculate the
new layout without actually making the changes. Calculations are performed in the tool's JVM so they don't
affect the performance of the running cluster either. This process is repeated several times until a limit
is reached or there are no more suggestions left to apply (although unresolved violations may still remain!)
`-i <NUMBER>`::
Number of iterations of the simulation loop. Default is 10.
Results of the simulation contain the initial suggestions, suggestions at each step of the
simulation and the final simulated state of the cluster.

View File

@ -103,7 +103,7 @@ public interface DistribStateManager extends SolrCloseable {
}
List<String> children = listData(node);
for (final String child : children) {
final String childPath = node + "/" + child;
final String childPath = node + (node.equals("/") ? "" : "/") + child;
queue.add(childPath);
tree.add(childPath);
}

View File

@ -16,6 +16,8 @@
*/
package org.apache.solr.client.solrj.cloud.autoscaling;
import org.apache.zookeeper.CreateMode;
/**
* Immutable representation of binary data with version.
*/
@ -23,16 +25,19 @@ public class VersionedData {
private final int version;
private final byte[] data;
private final String owner;
private final CreateMode mode;
/**
* Constructor.
* @param version version of the data, or -1 if unknown
* @param data binary data, or null.
* @param mode create mode
* @param owner symbolic identifier of data owner / creator, or null.
*/
public VersionedData(int version, byte[] data, String owner) {
public VersionedData(int version, byte[] data, CreateMode mode, String owner) {
this.version = version;
this.data = data;
this.mode = mode;
this.owner = owner;
}
@ -44,6 +49,10 @@ public class VersionedData {
return data;
}
public CreateMode getMode() {
return mode;
}
public String getOwner() {
return owner;
}

View File

@ -85,7 +85,9 @@ public class ZkDistribStateManager implements DistribStateManager {
Stat stat = new Stat();
try {
byte[] bytes = zkClient.getData(path, watcher, stat, true);
return new VersionedData(stat.getVersion(), bytes, String.valueOf(stat.getEphemeralOwner()));
return new VersionedData(stat.getVersion(), bytes,
stat.getEphemeralOwner() != 0 ? CreateMode.EPHEMERAL : CreateMode.PERSISTENT,
String.valueOf(stat.getEphemeralOwner()));
} catch (KeeperException.NoNodeException e) {
throw new NoSuchElementException(path);
} catch (InterruptedException e) {