SOLR-14486: Autoscaling simulation framework should stop using /clusterstate.json.

This commit is contained in:
Andrzej Bialecki 2020-05-19 18:52:47 +02:00
parent 44fc5b989a
commit 57b7d8a8db
5 changed files with 207 additions and 118 deletions

View File

@ -71,6 +71,9 @@ Other Changes
If you have security concerns or other reasons to disable the Admin UI, you can modify `SOLR_ADMIN_UI_DISABLED` If you have security concerns or other reasons to disable the Admin UI, you can modify `SOLR_ADMIN_UI_DISABLED`
`solr.in.sh`/`solr.in.cmd` at start. (marcussorealheis) `solr.in.sh`/`solr.in.cmd` at start. (marcussorealheis)
* SOLR-14486: Autoscaling simulation framework no longer creates /clusterstate.json (format 1),
instead it creates individual per-collection /state.json files (format 2). (ab)
================== 8.6.0 ================== ================== 8.6.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -181,7 +181,6 @@ public class SimCloudManager implements SolrCloudManager {
if (distribStateManager == null) { if (distribStateManager == null) {
this.stateManager = new SimDistribStateManager(SimDistribStateManager.createNewRootNode()); this.stateManager = new SimDistribStateManager(SimDistribStateManager.createNewRootNode());
// init common paths // init common paths
stateManager.makePath(ZkStateReader.CLUSTER_STATE);
stateManager.makePath(ZkStateReader.CLUSTER_PROPS); stateManager.makePath(ZkStateReader.CLUSTER_PROPS);
stateManager.makePath(ZkStateReader.SOLR_AUTOSCALING_CONF_PATH); stateManager.makePath(ZkStateReader.SOLR_AUTOSCALING_CONF_PATH);
stateManager.makePath(ZkStateReader.LIVE_NODES_ZKNODE); stateManager.makePath(ZkStateReader.LIVE_NODES_ZKNODE);

View File

@ -48,7 +48,9 @@ import java.util.stream.Collectors;
import com.google.common.util.concurrent.AtomicDouble; import com.google.common.util.concurrent.AtomicDouble;
import org.apache.commons.math3.stat.descriptive.SummaryStatistics; import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.apache.solr.client.solrj.cloud.DistribStateManager; import org.apache.solr.client.solrj.cloud.DistribStateManager;
import org.apache.solr.client.solrj.cloud.autoscaling.AlreadyExistsException;
import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig; import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
import org.apache.solr.client.solrj.cloud.autoscaling.BadVersionException;
import org.apache.solr.client.solrj.cloud.autoscaling.Policy; import org.apache.solr.client.solrj.cloud.autoscaling.Policy;
import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper; import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper;
import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo; import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo;
@ -98,6 +100,7 @@ import org.apache.solr.core.SolrInfoBean;
import org.apache.solr.metrics.SolrMetricManager; import org.apache.solr.metrics.SolrMetricManager;
import org.apache.solr.update.SolrIndexSplitter; import org.apache.solr.update.SolrIndexSplitter;
import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -119,8 +122,8 @@ import static org.apache.solr.common.params.CommonParams.NAME;
* <li>using autoscaling policy for replica placements</li> * <li>using autoscaling policy for replica placements</li>
* <li>maintaining and up-to-date list of /live_nodes and nodeAdded / nodeLost markers</li> * <li>maintaining and up-to-date list of /live_nodes and nodeAdded / nodeLost markers</li>
* <li>running a simulated leader election on collection changes (with throttling), when needed</li> * <li>running a simulated leader election on collection changes (with throttling), when needed</li>
* <li>maintaining an up-to-date /clusterstate.json (single file format), which also tracks replica states, * <li>maintaining an up-to-date /state.json per-collection files, which also track replica states,
* leader election changes, replica property changes, etc. Note: this file is only written, * leader election changes, replica property changes, etc. Note: these files are only written,
* but never read by the framework!</li> * but never read by the framework!</li>
* <li>maintaining an up-to-date /clusterprops.json. Note: this file is only written, but never read by the * <li>maintaining an up-to-date /clusterprops.json. Note: this file is only written, but never read by the
* framework!</li> * framework!</li>
@ -153,12 +156,131 @@ public class SimClusterStateProvider implements ClusterStateProvider {
private final Map<String, Map<String, Long>> opDelays = new ConcurrentHashMap<>(); private final Map<String, Map<String, Long>> opDelays = new ConcurrentHashMap<>();
private volatile int clusterStateVersion = 0;
private volatile String overseerLeader = null; private volatile String overseerLeader = null;
private volatile Map<String, Object> lastSavedProperties = null; private volatile Map<String, Object> lastSavedProperties = null;
private final AtomicReference<Map<String, DocCollection>> collectionsStatesRef = new AtomicReference<>(); private class CachedCollectionRef {
private final String name;
private int zkVersion;
private DocCollection coll;
ReentrantLock lock = new ReentrantLock();
CachedCollectionRef(String name, int zkVersion) {
this.name = name;
this.zkVersion = zkVersion;
}
public DocCollection getColl() throws InterruptedException, IOException {
DocCollection dc = coll;
if (dc != null) {
return dc;
}
lock.lock();
try {
if (coll != null) {
return coll;
} else {
Map<String, Map<String, Map<String, Replica>>> collMap = new HashMap<>();
nodeReplicaMap.forEach((n, replicas) -> {
synchronized (replicas) {
replicas.forEach(ri -> {
if (!ri.getCollection().equals(name)) {
return;
}
Map<String, Object> props;
synchronized (ri) {
props = new HashMap<>(ri.getVariables());
}
props.put(ZkStateReader.NODE_NAME_PROP, n);
props.put(ZkStateReader.CORE_NAME_PROP, ri.getCore());
props.put(ZkStateReader.REPLICA_TYPE, ri.getType().toString());
props.put(ZkStateReader.STATE_PROP, ri.getState().toString());
Replica r = new Replica(ri.getName(), props, ri.getCollection(), ri.getShard());
collMap.computeIfAbsent(ri.getCollection(), c -> new HashMap<>())
.computeIfAbsent(ri.getShard(), s -> new HashMap<>())
.put(ri.getName(), r);
});
}
});
// add empty slices
sliceProperties.forEach((c, perSliceProps) -> {
if (!c.equals(name)) {
return;
}
perSliceProps.forEach((slice, props) -> {
collMap.computeIfAbsent(c, co -> new ConcurrentHashMap<>()).computeIfAbsent(slice, s -> new ConcurrentHashMap<>());
});
});
// add empty collections
collProperties.keySet().forEach(c -> {
if (!c.equals(name)) {
return;
}
collMap.computeIfAbsent(c, co -> new ConcurrentHashMap<>());
});
Map<String, Map<String, Replica>> shards = collMap.get(name);
Map<String, Slice> slices = new HashMap<>();
shards.forEach((s, replicas) -> {
Map<String, Object> sliceProps = sliceProperties.computeIfAbsent(name, c -> new ConcurrentHashMap<>()).computeIfAbsent(s, sl -> new ConcurrentHashMap<>());
Slice slice = new Slice(s, replicas, sliceProps, name);
slices.put(s, slice);
});
Map<String, Object> collProps = collProperties.computeIfAbsent(name, c -> new ConcurrentHashMap<>());
Map<String, Object> routerProp = (Map<String, Object>) collProps.getOrDefault(DocCollection.DOC_ROUTER, Collections.singletonMap("name", DocRouter.DEFAULT_NAME));
DocRouter router = DocRouter.getDocRouter((String)routerProp.getOrDefault("name", DocRouter.DEFAULT_NAME));
String path = ZkStateReader.getCollectionPath(name);
coll = new DocCollection(name, slices, collProps, router, zkVersion + 1, path);
try {
SimDistribStateManager stateManager = cloudManager.getSimDistribStateManager();
byte[] data = Utils.toJSON(Collections.singletonMap(name, coll));
if (!stateManager.hasData(path)) {
try {
stateManager.makePath(path, data, CreateMode.PERSISTENT, true);
} catch (AlreadyExistsException e) {
// try updating
stateManager.setData(path, data, zkVersion);
}
} else {
stateManager.setData(path, data, zkVersion);
}
// verify version
VersionedData vd = stateManager.getData(path);
assert vd.getVersion() == zkVersion + 1;
zkVersion++;
} catch (KeeperException | BadVersionException e) {
// should never happen?
throw new RuntimeException("error saving " + coll, e);
}
}
} finally {
lock.unlock();
}
return coll;
}
public int getZkVersion() {
lock.lock();
try {
return zkVersion;
} finally {
lock.unlock();
}
}
public void invalidate() {
lock.lock();
try {
coll = null;
} finally {
lock.unlock();
}
}
}
private final Map<String, CachedCollectionRef> collectionsStatesRef = new ConcurrentHashMap<>();
private final Random bulkUpdateRandom = new Random(0); private final Random bulkUpdateRandom = new Random(0);
@ -207,6 +329,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
sliceProperties.clear(); sliceProperties.clear();
nodeReplicaMap.clear(); nodeReplicaMap.clear();
liveNodes.clear(); liveNodes.clear();
collectionsStatesRef.clear();
for (String nodeId : stateManager.listData(ZkStateReader.LIVE_NODES_ZKNODE)) { for (String nodeId : stateManager.listData(ZkStateReader.LIVE_NODES_ZKNODE)) {
if (stateManager.hasData(ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeId)) { if (stateManager.hasData(ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeId)) {
stateManager.removeData(ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeId, -1); stateManager.removeData(ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeId, -1);
@ -223,6 +346,8 @@ public class SimClusterStateProvider implements ClusterStateProvider {
createEphemeralLiveNode(nodeId); createEphemeralLiveNode(nodeId);
} }
initialState.forEachCollection(dc -> { initialState.forEachCollection(dc -> {
// DocCollection will be created later
collectionsStatesRef.put(dc.getName(), new CachedCollectionRef(dc.getName(), dc.getZNodeVersion()));
collProperties.computeIfAbsent(dc.getName(), name -> new ConcurrentHashMap<>()).putAll(dc.getProperties()); collProperties.computeIfAbsent(dc.getName(), name -> new ConcurrentHashMap<>()).putAll(dc.getProperties());
opDelays.computeIfAbsent(dc.getName(), Utils.NEW_HASHMAP_FUN).putAll(defaultOpDelays); opDelays.computeIfAbsent(dc.getName(), Utils.NEW_HASHMAP_FUN).putAll(defaultOpDelays);
dc.getSlices().forEach(s -> { dc.getSlices().forEach(s -> {
@ -248,7 +373,6 @@ public class SimClusterStateProvider implements ClusterStateProvider {
}); });
}); });
}); });
collectionsStatesRef.set(null);
} finally { } finally {
lock.unlock(); lock.unlock();
} }
@ -287,8 +411,6 @@ public class SimClusterStateProvider implements ClusterStateProvider {
return nodes.get(random.nextInt(nodes.size())); return nodes.get(random.nextInt(nodes.size()));
} }
// todo: maybe hook up DistribStateManager /clusterstate.json watchers?
private ReplicaInfo getReplicaInfo(Replica r) { private ReplicaInfo getReplicaInfo(Replica r) {
final List<ReplicaInfo> list = nodeReplicaMap.computeIfAbsent final List<ReplicaInfo> list = nodeReplicaMap.computeIfAbsent
(r.getNodeName(), Utils.NEW_SYNCHRONIZED_ARRAYLIST_FUN); (r.getNodeName(), Utils.NEW_SYNCHRONIZED_ARRAYLIST_FUN);
@ -331,8 +453,8 @@ public class SimClusterStateProvider implements ClusterStateProvider {
// mark every replica on that node as down // mark every replica on that node as down
boolean res = liveNodes.remove(nodeId); boolean res = liveNodes.remove(nodeId);
setReplicaStates(nodeId, Replica.State.DOWN, collections); setReplicaStates(nodeId, Replica.State.DOWN, collections);
if (!collections.isEmpty()) { for (String collection : collections) {
collectionsStatesRef.set(null); collectionsStatesRef.get(collection).invalidate();;
} }
// remove ephemeral nodes // remove ephemeral nodes
stateManager.getRoot().removeEphemeralChildren(nodeId); stateManager.getRoot().removeEphemeralChildren(nodeId);
@ -363,7 +485,6 @@ public class SimClusterStateProvider implements ClusterStateProvider {
try { try {
Set<String> myNodes = new HashSet<>(nodeReplicaMap.keySet()); Set<String> myNodes = new HashSet<>(nodeReplicaMap.keySet());
myNodes.removeAll(liveNodes.get()); myNodes.removeAll(liveNodes.get());
collectionsStatesRef.set(null);
} finally { } finally {
lock.unlock(); lock.unlock();
} }
@ -452,7 +573,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
try { try {
setReplicaStates(nodeId, Replica.State.ACTIVE, collections); setReplicaStates(nodeId, Replica.State.ACTIVE, collections);
if (!collections.isEmpty()) { if (!collections.isEmpty()) {
collectionsStatesRef.set(null); collections.forEach(c -> collectionsStatesRef.get(c).invalidate());
simRunLeaderElection(collections, true); simRunLeaderElection(collections, true);
return true; return true;
} else { } else {
@ -604,7 +725,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
cloudManager.getMetricManager().registerGauge(null, registry, cloudManager.getMetricManager().registerGauge(null, registry,
() -> replicaSize, "", true, Type.CORE_IDX.metricsAttribute); () -> replicaSize, "", true, Type.CORE_IDX.metricsAttribute);
// at this point nuke our cached DocCollection state // at this point nuke our cached DocCollection state
collectionsStatesRef.set(null); collectionsStatesRef.get(replicaInfo.getCollection()).invalidate();
log.trace("-- simAddReplica {}", replicaInfo); log.trace("-- simAddReplica {}", replicaInfo);
if (runLeaderElection) { if (runLeaderElection) {
simRunLeaderElection(replicaInfo.getCollection(), replicaInfo.getShard(), true); simRunLeaderElection(replicaInfo.getCollection(), replicaInfo.getShard(), true);
@ -633,7 +754,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
colShardReplicaMap.computeIfAbsent(ri.getCollection(), c -> new ConcurrentHashMap<>()) colShardReplicaMap.computeIfAbsent(ri.getCollection(), c -> new ConcurrentHashMap<>())
.computeIfAbsent(ri.getShard(), s -> new ArrayList<>()) .computeIfAbsent(ri.getShard(), s -> new ArrayList<>())
.remove(ri); .remove(ri);
collectionsStatesRef.set(null); collectionsStatesRef.get(ri.getCollection()).invalidate();
opDelay(ri.getCollection(), CollectionParams.CollectionAction.DELETEREPLICA.name()); opDelay(ri.getCollection(), CollectionParams.CollectionAction.DELETEREPLICA.name());
@ -668,26 +789,6 @@ public class SimClusterStateProvider implements ClusterStateProvider {
} }
} }
/**
* Save clusterstate.json to {@link DistribStateManager}.
* @return saved state
*/
private ClusterState saveClusterState(ClusterState state) throws IOException {
ensureNotClosed();
byte[] data = Utils.toJSON(state);
try {
VersionedData oldData = stateManager.getData(ZkStateReader.CLUSTER_STATE);
int version = oldData != null ? oldData.getVersion() : 0;
assert clusterStateVersion == version : "local clusterStateVersion out of sync";
stateManager.setData(ZkStateReader.CLUSTER_STATE, data, version);
log.debug("** saved cluster state version {}", version);
clusterStateVersion++;
} catch (Exception e) {
throw new IOException(e);
}
return state;
}
/** /**
* Delay an operation by a configured amount. * Delay an operation by a configured amount.
* @param collection collection name * @param collection collection name
@ -725,7 +826,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
if (saveClusterState) { if (saveClusterState) {
lock.lockInterruptibly(); lock.lockInterruptibly();
try { try {
collectionsStatesRef.set(null); collections.forEach(c -> collectionsStatesRef.get(c).invalidate());
} finally { } finally {
lock.unlock(); lock.unlock();
} }
@ -865,13 +966,13 @@ public class SimClusterStateProvider implements ClusterStateProvider {
} }
if (log.isDebugEnabled()) { if (log.isDebugEnabled()) {
log.debug("-- elected new leader for {} / {} (currentVersion={}): {}", collection, log.debug("-- elected new leader for {} / {} (currentVersion={}): {}", collection,
s.getName(), clusterStateVersion, ri); s.getName(), col.getZNodeVersion(), ri);
} }
stateChanged.set(true); stateChanged.set(true);
} }
} finally { } finally {
if (stateChanged.get() || saveState) { if (stateChanged.get() || saveState) {
collectionsStatesRef.set(null); collectionsStatesRef.get(collection).invalidate();
} }
lock.unlock(); lock.unlock();
} }
@ -889,7 +990,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
} }
boolean waitForFinalState = props.getBool(CommonAdminParams.WAIT_FOR_FINAL_STATE, false); boolean waitForFinalState = props.getBool(CommonAdminParams.WAIT_FOR_FINAL_STATE, false);
final String collectionName = props.getStr(NAME); final String collectionName = props.getStr(NAME);
log.debug("-- simCreateCollection {}, currentVersion={}", collectionName, clusterStateVersion); log.debug("-- simCreateCollection {}", collectionName);
String router = props.getStr("router.name", DocRouter.DEFAULT_NAME); String router = props.getStr("router.name", DocRouter.DEFAULT_NAME);
String policy = props.getStr(Policy.POLICY); String policy = props.getStr(Policy.POLICY);
@ -903,12 +1004,6 @@ public class SimClusterStateProvider implements ClusterStateProvider {
CreateCollectionCmd.checkReplicaTypes(props); CreateCollectionCmd.checkReplicaTypes(props);
// always force getting fresh state // always force getting fresh state
lock.lockInterruptibly();
try {
collectionsStatesRef.set(null);
} finally {
lock.unlock();
}
final ClusterState clusterState = getClusterState(); final ClusterState clusterState = getClusterState();
String withCollection = props.getStr(CollectionAdminParams.WITH_COLLECTION); String withCollection = props.getStr(CollectionAdminParams.WITH_COLLECTION);
@ -962,8 +1057,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
CollectionAdminParams.COLOCATED_WITH, collectionName); CollectionAdminParams.COLOCATED_WITH, collectionName);
cmd = new CollectionMutator(cloudManager).modifyCollection(clusterState,message); cmd = new CollectionMutator(cloudManager).modifyCollection(clusterState,message);
} }
// force recreation of collection states collectionsStatesRef.put(collectionName, new CachedCollectionRef(collectionName, 0));
collectionsStatesRef.set(null);
} finally { } finally {
lock.unlock(); lock.unlock();
@ -1043,7 +1137,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
// force recreation of collection states // force recreation of collection states
lock.lockInterruptibly(); lock.lockInterruptibly();
try { try {
collectionsStatesRef.set(null); collectionsStatesRef.get(collectionName).invalidate();
} finally { } finally {
lock.unlock(); lock.unlock();
} }
@ -1057,7 +1151,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
} }
} }
results.add("success", ""); results.add("success", "");
log.debug("-- finished createCollection {}, currentVersion={}", collectionName, clusterStateVersion); log.debug("-- finished createCollection {}", collectionName);
} }
/** /**
@ -1106,7 +1200,8 @@ public class SimClusterStateProvider implements ClusterStateProvider {
} }
} }
}); });
collectionsStatesRef.set(null); cloudManager.getDistribStateManager().removeRecursively(ZkStateReader.getCollectionPath(collection), true, true);
collectionsStatesRef.remove(collection);
results.add("success", ""); results.add("success", "");
} catch (Exception e) { } catch (Exception e) {
log.warn("Exception", e); log.warn("Exception", e);
@ -1121,7 +1216,13 @@ public class SimClusterStateProvider implements ClusterStateProvider {
public void simDeleteAllCollections() throws Exception { public void simDeleteAllCollections() throws Exception {
lock.lockInterruptibly(); lock.lockInterruptibly();
try { try {
collectionsStatesRef.set(null); collectionsStatesRef.keySet().forEach(name -> {
try {
cloudManager.getDistribStateManager().removeRecursively(ZkStateReader.getCollectionPath(name), true, true);
} catch (Exception e) {
log.error("Unable to delete collection state.json");
}
});
collProperties.clear(); collProperties.clear();
sliceProperties.clear(); sliceProperties.clear();
@ -1468,7 +1569,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
} }
// invalidate cached state // invalidate cached state
collectionsStatesRef.set(null); collectionsStatesRef.get(collectionName).invalidate();
} finally { } finally {
SplitShardCmd.unlockForSplit(cloudManager, collectionName, sliceName.get()); SplitShardCmd.unlockForSplit(cloudManager, collectionName, sliceName.get());
lock.unlock(); lock.unlock();
@ -1516,7 +1617,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
} }
} }
}); });
collectionsStatesRef.set(null); collectionsStatesRef.get(collectionName).invalidate();
results.add("success", ""); results.add("success", "");
} catch (Exception e) { } catch (Exception e) {
results.add("failure", e.toString()); results.add("failure", e.toString());
@ -2004,7 +2105,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
props.clear(); props.clear();
props.putAll(properties); props.putAll(properties);
} }
collectionsStatesRef.set(null); collectionsStatesRef.get(coll).invalidate();
} finally { } finally {
lock.unlock(); lock.unlock();
} }
@ -2025,7 +2126,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
} else { } else {
props.put(key, value); props.put(key, value);
} }
collectionsStatesRef.set(null); collectionsStatesRef.get(coll).invalidate();
} finally { } finally {
lock.unlock(); lock.unlock();
} }
@ -2046,7 +2147,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
if (properties != null) { if (properties != null) {
sliceProps.putAll(properties); sliceProps.putAll(properties);
} }
collectionsStatesRef.set(null); collectionsStatesRef.get(coll).invalidate();
} finally { } finally {
lock.unlock(); lock.unlock();
} }
@ -2247,7 +2348,6 @@ public class SimClusterStateProvider implements ClusterStateProvider {
lock.lockInterruptibly(); lock.lockInterruptibly();
try { try {
final Map<String, Map<String, Object>> stats = new TreeMap<>(); final Map<String, Map<String, Object>> stats = new TreeMap<>();
collectionsStatesRef.set(null);
ClusterState state = getClusterState(); ClusterState state = getClusterState();
state.forEachCollection(coll -> { state.forEachCollection(coll -> {
Map<String, Object> perColl = new LinkedHashMap<>(); Map<String, Object> perColl = new LinkedHashMap<>();
@ -2286,7 +2386,9 @@ public class SimClusterStateProvider implements ClusterStateProvider {
} }
continue; continue;
} }
AtomicLong buffered = (AtomicLong)sliceProperties.get(coll.getName()).get(s.getName()).get(BUFFERED_UPDATES); AtomicLong buffered = (AtomicLong)sliceProperties
.getOrDefault(coll.getName(), Collections.emptyMap())
.getOrDefault(s.getName(), Collections.emptyMap()).get(BUFFERED_UPDATES);
if (buffered != null) { if (buffered != null) {
bufferedDocs += buffered.get(); bufferedDocs += buffered.get();
} }
@ -2389,7 +2491,7 @@ public class SimClusterStateProvider implements ClusterStateProvider {
lock.lockInterruptibly(); lock.lockInterruptibly();
try { try {
Map<String, DocCollection> states = getCollectionStates(); Map<String, DocCollection> states = getCollectionStates();
ClusterState state = new ClusterState(clusterStateVersion, liveNodes.get(), states); ClusterState state = new ClusterState(0, liveNodes.get(), states);
return state; return state;
} finally { } finally {
lock.unlock(); lock.unlock();
@ -2399,65 +2501,18 @@ public class SimClusterStateProvider implements ClusterStateProvider {
} }
} }
// this method uses a simple cache in collectionsStatesRef. Operations that modify
// cluster state should always reset this cache so that the changes become visible
private Map<String, DocCollection> getCollectionStates() throws IOException, InterruptedException { private Map<String, DocCollection> getCollectionStates() throws IOException, InterruptedException {
lock.lockInterruptibly(); lock.lockInterruptibly();
try { try {
Map<String, DocCollection> collectionStates = collectionsStatesRef.get(); Map<String, DocCollection> collectionStates = new HashMap<>();
if (collectionStates != null) { collectionsStatesRef.forEach((name, cached) -> {
return collectionStates; try {
} collectionStates.put(name, cached.getColl());
collectionsStatesRef.set(null); } catch (Exception e) {
log.debug("** creating new collection states, currentVersion={}", clusterStateVersion); throw new RuntimeException("error building collection " + name + " state", e);
Map<String, Map<String, Map<String, Replica>>> collMap = new HashMap<>(); }
nodeReplicaMap.forEach((n, replicas) -> {
synchronized (replicas) {
replicas.forEach(ri -> {
Map<String, Object> props;
synchronized (ri) {
props = new HashMap<>(ri.getVariables());
}
props.put(ZkStateReader.NODE_NAME_PROP, n);
props.put(ZkStateReader.CORE_NAME_PROP, ri.getCore());
props.put(ZkStateReader.REPLICA_TYPE, ri.getType().toString());
props.put(ZkStateReader.STATE_PROP, ri.getState().toString());
Replica r = new Replica(ri.getName(), props, ri.getCollection(), ri.getShard());
collMap.computeIfAbsent(ri.getCollection(), c -> new HashMap<>())
.computeIfAbsent(ri.getShard(), s -> new HashMap<>())
.put(ri.getName(), r);
});
}
});
// add empty slices
sliceProperties.forEach((c, perSliceProps) -> {
perSliceProps.forEach((slice, props) -> {
collMap.computeIfAbsent(c, co -> new ConcurrentHashMap<>()).computeIfAbsent(slice, s -> new ConcurrentHashMap<>());
});
}); });
// add empty collections return collectionStates;
collProperties.keySet().forEach(c -> {
collMap.computeIfAbsent(c, co -> new ConcurrentHashMap<>());
});
Map<String, DocCollection> res = new HashMap<>();
collMap.forEach((coll, shards) -> {
Map<String, Slice> slices = new HashMap<>();
shards.forEach((s, replicas) -> {
Map<String, Object> sliceProps = sliceProperties.computeIfAbsent(coll, c -> new ConcurrentHashMap<>()).computeIfAbsent(s, sl -> new ConcurrentHashMap<>());
Slice slice = new Slice(s, replicas, sliceProps, coll);
slices.put(s, slice);
});
Map<String, Object> collProps = collProperties.computeIfAbsent(coll, c -> new ConcurrentHashMap<>());
Map<String, Object> routerProp = (Map<String, Object>) collProps.getOrDefault(DocCollection.DOC_ROUTER, Collections.singletonMap("name", DocRouter.DEFAULT_NAME));
DocRouter router = DocRouter.getDocRouter((String)routerProp.getOrDefault("name", DocRouter.DEFAULT_NAME));
DocCollection dc = new DocCollection(coll, slices, collProps, router, clusterStateVersion, ZkStateReader.CLUSTER_STATE);
res.put(coll, dc);
});
saveClusterState(new ClusterState(clusterStateVersion, liveNodes.get(), res));
collectionsStatesRef.set(res);
return res;
} finally { } finally {
lock.unlock(); lock.unlock();
} }

View File

@ -21,6 +21,7 @@ import java.io.UnsupportedEncodingException;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects; import java.util.Objects;
@ -55,8 +56,27 @@ public class SnapshotClusterStateProvider implements ClusterStateProvider {
liveNodes = Set.copyOf((Collection<String>)snapshot.getOrDefault("liveNodes", Collections.emptySet())); liveNodes = Set.copyOf((Collection<String>)snapshot.getOrDefault("liveNodes", Collections.emptySet()));
clusterProperties = (Map<String, Object>)snapshot.getOrDefault("clusterProperties", Collections.emptyMap()); clusterProperties = (Map<String, Object>)snapshot.getOrDefault("clusterProperties", Collections.emptyMap());
Map<String, Object> stateMap = new HashMap<>((Map<String, Object>)snapshot.getOrDefault("clusterState", Collections.emptyMap())); Map<String, Object> stateMap = new HashMap<>((Map<String, Object>)snapshot.getOrDefault("clusterState", Collections.emptyMap()));
Number version = (Number)stateMap.remove("version"); Map<String, DocCollection> collectionStates = new HashMap<>();
clusterState = ClusterState.load(version != null ? version.intValue() : null, stateMap, liveNodes, ZkStateReader.CLUSTER_STATE); // back-compat with format = 1
Integer stateVersion = Integer.valueOf(String.valueOf(stateMap.getOrDefault("version", 0)));
stateMap.remove("version");
stateMap.forEach((name, state) -> {
Map<String, Object> mutableState = (Map<String, Object>)state;
Map<String, Object> collMap = (Map<String, Object>) mutableState.get(name);
if (collMap == null) {
// snapshot in format 1
collMap = mutableState;
mutableState = Collections.singletonMap(name, state);
}
Integer version = Integer.parseInt(String.valueOf(collMap.getOrDefault("zNodeVersion", stateVersion)));
String path = String.valueOf(collMap.getOrDefault("zNode", ZkStateReader.getCollectionPath(name)));
collMap.remove("zNodeVersion");
collMap.remove("zNode");
byte[] data = Utils.toJSON(mutableState);
ClusterState collState = ClusterState.load(version, data, Collections.emptySet(), path);
collectionStates.put(name, collState.getCollection(name));
});
clusterState = new ClusterState(stateVersion, liveNodes, collectionStates);
} }
public Map<String, Object> getSnapshot() { public Map<String, Object> getSnapshot() {
@ -67,14 +87,18 @@ public class SnapshotClusterStateProvider implements ClusterStateProvider {
} }
Map<String, Object> stateMap = new HashMap<>(); Map<String, Object> stateMap = new HashMap<>();
snapshot.put("clusterState", stateMap); snapshot.put("clusterState", stateMap);
stateMap.put("version", clusterState.getZNodeVersion());
clusterState.forEachCollection(coll -> { clusterState.forEachCollection(coll -> {
CharArr out = new CharArr(); CharArr out = new CharArr();
JSONWriter writer = new JSONWriter(out, 2); JSONWriter writer = new JSONWriter(out, 2);
coll.write(writer); coll.write(writer);
String json = out.toString(); String json = out.toString();
try { try {
stateMap.put(coll.getName(), Utils.fromJSON(json.getBytes("UTF-8"))); Map<String, Object> collMap = new LinkedHashMap<>((Map<String, Object>)Utils.fromJSON(json.getBytes("UTF-8")));
collMap.put("zNodeVersion", coll.getZNodeVersion());
collMap.put("zNode", coll.getZNode());
// format compatible with the real /state.json, which uses a mini-ClusterState
// consisting of a single collection
stateMap.put(coll.getName(), Collections.singletonMap(coll.getName(), collMap));
} catch (UnsupportedEncodingException e) { } catch (UnsupportedEncodingException e) {
throw new RuntimeException("should not happen!", e); throw new RuntimeException("should not happen!", e);
} }

View File

@ -229,8 +229,8 @@ public class TestSnapshotCloudManager extends SolrCloudTestCase {
Pattern.compile("/autoscaling/triggerState/.*"), Pattern.compile("/autoscaling/triggerState/.*"),
// some triggers may have run after the snapshot was taken // some triggers may have run after the snapshot was taken
Pattern.compile("/autoscaling/events/.*"), Pattern.compile("/autoscaling/events/.*"),
// we always use format 1 in SimClusterStateProvider
Pattern.compile("/clusterstate\\.json"), Pattern.compile("/clusterstate\\.json"),
Pattern.compile("/collections/[^/]+?/state.json"),
// depending on the startup sequence leaders may differ // depending on the startup sequence leaders may differ
Pattern.compile("/collections/[^/]+?/leader_elect/.*"), Pattern.compile("/collections/[^/]+?/leader_elect/.*"),
Pattern.compile("/collections/[^/]+?/leaders/.*"), Pattern.compile("/collections/[^/]+?/leaders/.*"),
@ -255,6 +255,14 @@ public class TestSnapshotCloudManager extends SolrCloudTestCase {
.filter(STATE_FILTER_FUN).collect(Collectors.toList())); .filter(STATE_FILTER_FUN).collect(Collectors.toList()));
Collections.sort(treeOne); Collections.sort(treeOne);
Collections.sort(treeTwo); Collections.sort(treeTwo);
if (!treeOne.equals(treeTwo)) {
List<String> t1 = new ArrayList<>(treeOne);
t1.removeAll(treeTwo);
log.warn("Only in tree one: " + t1);
List<String> t2 = new ArrayList<>(treeTwo);
t2.removeAll(treeOne);
log.warn("Only in tree two: " + t2);
}
assertEquals(treeOne, treeTwo); assertEquals(treeOne, treeTwo);
for (String path : treeOne) { for (String path : treeOne) {
VersionedData vd1 = one.getData(path); VersionedData vd1 = one.getData(path);