mirror of https://github.com/apache/lucene.git
SOLR-14928: allow cluster state updates to be done in a distributed way and not through Overseer (#2364)
This commit is contained in:
parent
4a42ecd936
commit
23755ddfdd
|
@ -201,6 +201,8 @@ Other Changes
|
|||
|
||||
* SOLR-15118: Switch /v2/collections APIs over to the now-preferred annotated-POJO implementation approach (Jason Gerlowski)
|
||||
|
||||
* SOLR-14928: Allow cluster state updates to be done in a distributed fashion without going through Overseer (Ilan Ginzburg)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
* SOLR-14546: Fix for a relatively hard to hit issue in OverseerTaskProcessor that could lead to out of order execution
|
||||
|
|
|
@ -0,0 +1,822 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.cloud;
|
||||
|
||||
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
|
||||
import org.apache.solr.cloud.overseer.*;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.*;
|
||||
import org.apache.solr.common.params.CollectionParams;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.util.Pair;
|
||||
import org.apache.solr.common.util.Utils;
|
||||
import org.apache.zookeeper.CreateMode;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.apache.zookeeper.data.Stat;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.*;
|
||||
|
||||
import static java.util.Collections.singletonMap;
|
||||
import static org.apache.solr.cloud.overseer.ZkStateWriter.NO_OP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.COLLECTIONS_ZKNODE;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATE;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETE;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATESHARD;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETESHARD;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.ADDREPLICA;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.ADDREPLICAPROP;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETEREPLICAPROP;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.BALANCESHARDUNIQUE;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.MODIFYCOLLECTION;
|
||||
|
||||
/**
|
||||
* Gives access to distributed cluster state update methods and allows code to inquire whether distributed state update is enabled.
|
||||
*/
|
||||
public class DistributedClusterStateUpdater {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
/**
|
||||
* When {@code true} each node updates Zookeeper directly for changing state.json files. When {@code false} messages
|
||||
* are instead sent to the Overseer and the update is done there.
|
||||
*/
|
||||
private final boolean useDistributedStateUpdate;
|
||||
|
||||
/**
|
||||
* Builds an instance with the specified behavior regarding distribution of state updates, allowing to know distributed
|
||||
* updates are not enabled (parameter {@code useDistributedStateUpdate} is {@code false}), or when they are (parameter
|
||||
* is {@code true)}, gives access to methods and classes allowing the execution of the updates.
|
||||
*
|
||||
* @param useDistributedStateUpdate when this parameter is {@code false}, only method expected to ever be called on this
|
||||
* instance is {@link #isDistributedStateUpdate}, and it will return {@code false}.
|
||||
*/
|
||||
public DistributedClusterStateUpdater(boolean useDistributedStateUpdate) {
|
||||
this.useDistributedStateUpdate = useDistributedStateUpdate;
|
||||
if (log.isInfoEnabled()) {
|
||||
log.info("Creating DistributedClusterStateUpdater with useDistributedStateUpdate=" + useDistributedStateUpdate
|
||||
+ ". Solr will be using " + (useDistributedStateUpdate ? "distributed" : "Overseer based") + " cluster state updates."); // nowarn
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new instance of {@link StateChangeRecorder} for a given collection and a given intention (collection
|
||||
* creation vs. operations on an existing collection)
|
||||
*/
|
||||
public StateChangeRecorder createStateChangeRecorder(String collectionName, boolean isCollectionCreation) {
|
||||
if (!useDistributedStateUpdate) {
|
||||
// Seeing this exception or any other of this kind here means there's a big bug in the code. No user input can cause this.
|
||||
throw new IllegalStateException("Not expecting to create instances of StateChangeRecorder when not using distributed state update");
|
||||
}
|
||||
return new StateChangeRecorder(collectionName, isCollectionCreation);
|
||||
}
|
||||
|
||||
/**
|
||||
* Syntactic sugar to allow a single change to the cluster state to be made in a single call.
|
||||
*/
|
||||
public void doSingleStateUpdate(MutatingCommand command, ZkNodeProps message,
|
||||
SolrCloudManager scm, ZkStateReader zkStateReader) throws KeeperException, InterruptedException {
|
||||
if (!useDistributedStateUpdate) {
|
||||
throw new IllegalStateException("Not expecting to execute doSingleStateUpdate when not using distributed state update");
|
||||
}
|
||||
String collectionName = command.getCollectionName(message);
|
||||
final StateChangeRecorder scr = new StateChangeRecorder(collectionName, command.isCollectionCreation());
|
||||
scr.record(command, message);
|
||||
scr.executeStateUpdates(scm, zkStateReader);
|
||||
}
|
||||
|
||||
public void executeNodeDownStateUpdate(String nodeName, ZkStateReader zkStateReader) {
|
||||
if (!useDistributedStateUpdate) {
|
||||
throw new IllegalStateException("Not expecting to execute executeNodeDownStateUpdate when not using distributed state update");
|
||||
}
|
||||
CollectionNodeDownChangeCalculator.executeNodeDownStateUpdate(nodeName, zkStateReader);
|
||||
}
|
||||
|
||||
/**
|
||||
* When this method returns {@code false} the legacy behavior of enqueueing cluster state update messages to Overseer
|
||||
* should be used and no other method of this class should be called.
|
||||
*/
|
||||
public boolean isDistributedStateUpdate() {
|
||||
return useDistributedStateUpdate;
|
||||
}
|
||||
|
||||
/**
|
||||
* Naming of enum instances are the mutator object name (e.g. {@code Cluster} for {@link ClusterStateMutator} or
|
||||
* {@code Collection} for {@link CollectionMutator}) followed by the method name of the mutator.
|
||||
* For example {@link #SliceAddReplica} represents {@link SliceMutator#addReplica}.
|
||||
* <p>
|
||||
* Even though the various mutator classes do not implement any common interface, luckily their constructors and methods
|
||||
* take the same set of parameters so all can be called from the enum method {@link #buildWriteCommand(SolrCloudManager, ClusterState, ZkNodeProps)}.
|
||||
* <p>
|
||||
* Given that {@link OverseerAction#DOWNNODE} is different (it returns a list of write commands and impacts more than one collection),
|
||||
* it is handled specifically in {@link CollectionNodeDownChangeCalculator#executeNodeDownStateUpdate}.
|
||||
*/
|
||||
public enum MutatingCommand {
|
||||
BalanceShardsUnique(BALANCESHARDUNIQUE, ZkStateReader.COLLECTION_PROP) {
|
||||
@Override
|
||||
public ZkWriteCommand buildWriteCommand(SolrCloudManager scm, ClusterState cs, ZkNodeProps message) {
|
||||
ExclusiveSliceProperty dProp = new ExclusiveSliceProperty(cs, message);
|
||||
// Next line is where the actual work is done
|
||||
if (dProp.balanceProperty()) {
|
||||
return new ZkWriteCommand(getCollectionName(message), dProp.getDocCollection());
|
||||
} else {
|
||||
return NO_OP;
|
||||
}
|
||||
}
|
||||
},
|
||||
ClusterCreateCollection(CREATE, CommonParams.NAME) {
|
||||
@Override
|
||||
public ZkWriteCommand buildWriteCommand(SolrCloudManager scm, ClusterState cs, ZkNodeProps message) {
|
||||
return new ClusterStateMutator(scm).createCollection(cs, message);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCollectionCreation() {
|
||||
return true;
|
||||
}
|
||||
},
|
||||
ClusterDeleteCollection(DELETE, CommonParams.NAME) {
|
||||
@Override
|
||||
public ZkWriteCommand buildWriteCommand(SolrCloudManager scm, ClusterState cs, ZkNodeProps message) {
|
||||
return new ClusterStateMutator(scm).deleteCollection(cs, message);
|
||||
}
|
||||
},
|
||||
CollectionDeleteShard(DELETESHARD, ZkStateReader.COLLECTION_PROP) {
|
||||
@Override
|
||||
public ZkWriteCommand buildWriteCommand(SolrCloudManager scm, ClusterState cs, ZkNodeProps message) {
|
||||
return new CollectionMutator(scm).deleteShard(cs, message);
|
||||
}
|
||||
},
|
||||
CollectionModifyCollection(MODIFYCOLLECTION, ZkStateReader.COLLECTION_PROP) {
|
||||
@Override
|
||||
public ZkWriteCommand buildWriteCommand(SolrCloudManager scm, ClusterState cs, ZkNodeProps message) {
|
||||
return new CollectionMutator(scm).modifyCollection(cs, message);
|
||||
}
|
||||
},
|
||||
CollectionCreateShard(CREATESHARD, ZkStateReader.COLLECTION_PROP) {
|
||||
@Override
|
||||
public ZkWriteCommand buildWriteCommand(SolrCloudManager scm, ClusterState cs, ZkNodeProps message) {
|
||||
return new CollectionMutator(scm).createShard(cs, message);
|
||||
}
|
||||
},
|
||||
ReplicaAddReplicaProperty(ADDREPLICAPROP, ZkStateReader.COLLECTION_PROP) {
|
||||
@Override
|
||||
public ZkWriteCommand buildWriteCommand(SolrCloudManager scm, ClusterState cs, ZkNodeProps message) {
|
||||
return new ReplicaMutator(scm).addReplicaProperty(cs, message);
|
||||
}
|
||||
},
|
||||
ReplicaDeleteReplicaProperty(DELETEREPLICAPROP, ZkStateReader.COLLECTION_PROP) {
|
||||
@Override
|
||||
public ZkWriteCommand buildWriteCommand(SolrCloudManager scm, ClusterState cs, ZkNodeProps message) {
|
||||
return new ReplicaMutator(scm).deleteReplicaProperty(cs, message);
|
||||
}
|
||||
},
|
||||
ReplicaSetState(null, ZkStateReader.COLLECTION_PROP) {
|
||||
@Override
|
||||
public ZkWriteCommand buildWriteCommand(SolrCloudManager scm, ClusterState cs, ZkNodeProps message) {
|
||||
return new ReplicaMutator(scm).setState(cs, message);
|
||||
}
|
||||
},
|
||||
SliceAddReplica(ADDREPLICA, ZkStateReader.COLLECTION_PROP) {
|
||||
@Override
|
||||
public ZkWriteCommand buildWriteCommand(SolrCloudManager scm, ClusterState cs, ZkNodeProps message) {
|
||||
return new SliceMutator(scm).addReplica(cs, message);
|
||||
}
|
||||
},
|
||||
SliceAddRoutingRule(null, ZkStateReader.COLLECTION_PROP) {
|
||||
@Override
|
||||
public ZkWriteCommand buildWriteCommand(SolrCloudManager scm, ClusterState cs, ZkNodeProps message) {
|
||||
return new SliceMutator(scm).addRoutingRule(cs, message);
|
||||
}
|
||||
},
|
||||
SliceRemoveReplica(null, ZkStateReader.COLLECTION_PROP) {
|
||||
@Override
|
||||
public ZkWriteCommand buildWriteCommand(SolrCloudManager scm, ClusterState cs, ZkNodeProps message) {
|
||||
return new SliceMutator(scm).removeReplica(cs, message);
|
||||
}
|
||||
},
|
||||
SliceRemoveRoutingRule(null, ZkStateReader.COLLECTION_PROP) {
|
||||
@Override
|
||||
public ZkWriteCommand buildWriteCommand(SolrCloudManager scm, ClusterState cs, ZkNodeProps message) {
|
||||
return new SliceMutator(scm).removeRoutingRule(cs, message);
|
||||
}
|
||||
},
|
||||
SliceSetShardLeader(null, ZkStateReader.COLLECTION_PROP) {
|
||||
@Override
|
||||
public ZkWriteCommand buildWriteCommand(SolrCloudManager scm, ClusterState cs, ZkNodeProps message) {
|
||||
return new SliceMutator(scm).setShardLeader(cs, message);
|
||||
}
|
||||
},
|
||||
SliceUpdateShardState(null, ZkStateReader.COLLECTION_PROP) {
|
||||
@Override
|
||||
public ZkWriteCommand buildWriteCommand(SolrCloudManager scm, ClusterState cs, ZkNodeProps message) {
|
||||
return new SliceMutator(scm).updateShardState(cs, message);
|
||||
}
|
||||
};
|
||||
|
||||
private static final EnumMap<CollectionParams.CollectionAction, MutatingCommand> actionsToCommands;
|
||||
|
||||
static {
|
||||
actionsToCommands = new EnumMap<>(CollectionParams.CollectionAction.class);
|
||||
for (MutatingCommand mc : MutatingCommand.values()) {
|
||||
if (mc.collectionAction != null) {
|
||||
actionsToCommands.put(mc.collectionAction, mc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private final CollectionParams.CollectionAction collectionAction;
|
||||
private final String collectionNameParamName;
|
||||
|
||||
MutatingCommand(CollectionParams.CollectionAction collectionAction, String collectionNameParamName) {
|
||||
this.collectionAction = collectionAction;
|
||||
this.collectionNameParamName = collectionNameParamName;
|
||||
}
|
||||
|
||||
/**
|
||||
* mutating commands that return a single ZkWriteCommand override this method
|
||||
*/
|
||||
public abstract ZkWriteCommand buildWriteCommand(SolrCloudManager scm, ClusterState cs, ZkNodeProps message);
|
||||
|
||||
public String getCollectionName(ZkNodeProps message) {
|
||||
return message.getStr(collectionNameParamName);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the {@link MutatingCommand} corresponding to the passed {@link org.apache.solr.common.params.CollectionParams.CollectionAction} or
|
||||
* {@code null} if no cluster state update command is defined for that action (given that {@link org.apache.solr.common.params.CollectionParams.CollectionAction}
|
||||
* are used for the Collection API and only some are used for the cluster state updates, this is expected).
|
||||
*/
|
||||
public static MutatingCommand getCommandFor(CollectionParams.CollectionAction collectionAction) {
|
||||
return actionsToCommands.get(collectionAction);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given only one command creates a collection {@link #ClusterCreateCollection}, the default implementation is provided here.
|
||||
*/
|
||||
public boolean isCollectionCreation() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Instances of this class are the fundamental building block of the CAS (Compare and Swap) update approach. These instances
|
||||
* accept an initial cluster state (as present in Zookeeper basically) and apply to it a set of modifications that are
|
||||
* then attempted to be written back to Zookeeper {@link ZkUpdateApplicator is driving this process}.
|
||||
* If the update fails (due to a concurrent update), the Zookeeper content is read again, the changes (updates) are
|
||||
* applied to it again and a new write attempt is made. This guarantees than an update does not overwrite data just
|
||||
* written by a concurrent update happening from the same or from another node.
|
||||
*/
|
||||
interface StateChangeCalculator {
|
||||
String getCollectionName();
|
||||
|
||||
/**
|
||||
* @return {@code true} if this updater is computing updates for creating a collection that does not exist yet.
|
||||
*/
|
||||
boolean isCollectionCreation();
|
||||
|
||||
/**
|
||||
* Given an initial {@link ClusterState}, computes after applying updates the cluster state to be written to state.json
|
||||
* (made available through {@link #getUpdatedClusterState()}) as well as the list of per replica operations (made available
|
||||
* through {@link #getPerReplicaStatesOps()}). Any or both of these methods will return {@code null} if there is no
|
||||
* corresponding update to apply.
|
||||
*/
|
||||
void computeUpdates(ClusterState currentState);
|
||||
|
||||
/**
|
||||
* Method can only be called after {@link #computeUpdates} has been called.
|
||||
* @return the new state to write into {@code state.json} or {@code null} if no update needed.
|
||||
*/
|
||||
ClusterState getUpdatedClusterState();
|
||||
|
||||
/**
|
||||
* Method can only be called after {@link #computeUpdates} has been called.
|
||||
* @return {@code null} when there are no per replica state ops
|
||||
*/
|
||||
List<PerReplicaStatesOps> getPerReplicaStatesOps();
|
||||
}
|
||||
|
||||
/**
|
||||
* This class is passed a {@link StateChangeCalculator} targeting a single collection that is able to apply an update to an
|
||||
* initial cluster state and return the updated cluster state. The {@link StateChangeCalculator} is used (possibly multiple times)
|
||||
* to do a Compare And Swap (a.k.a conditional update or CAS) of the collection's {@code state.json} Zookeeper file.<p>
|
||||
*
|
||||
* When there are per replica states to update, they are attempted once (they do their own Compare And Swap), before
|
||||
* the (potentially multiple) attempts to update the {@code state.json} file. This conforms to the strategy in place
|
||||
* when {@code state.json} updates are sent to the Overseer to do. See {@link ZkStateWriter#writePendingUpdates}.
|
||||
*/
|
||||
static private class ZkUpdateApplicator {
|
||||
/**
|
||||
* When trying to update a {@code state.json} file that keeps getting changed by concurrent updater, the number of attempts
|
||||
* made before giving up. This is likely way too high, if we get to 50 failed attempts something else went wrong.
|
||||
* To be reconsidered once Collection API commands are distributed as well.
|
||||
*/
|
||||
public static final int CAS_MAX_ATTEMPTS = 50;
|
||||
|
||||
private final ZkStateReader zkStateReader;
|
||||
private final StateChangeCalculator updater;
|
||||
|
||||
static void applyUpdate(ZkStateReader zkStateReader, StateChangeCalculator updater) throws KeeperException, InterruptedException {
|
||||
ZkUpdateApplicator zua = new ZkUpdateApplicator(zkStateReader, updater);
|
||||
zua.applyUpdate();
|
||||
}
|
||||
|
||||
private ZkUpdateApplicator(ZkStateReader zkStateReader, StateChangeCalculator updater) {
|
||||
this.zkStateReader = zkStateReader;
|
||||
this.updater = updater;
|
||||
}
|
||||
|
||||
/**
|
||||
* By delegating work to {@link PerReplicaStatesOps} for per replica state updates, and using optimistic locking
|
||||
* (with retries) to directly update the content of {@code state.json}, updates Zookeeper with the changes computed
|
||||
* by the {@link StateChangeCalculator}.
|
||||
*/
|
||||
private void applyUpdate() throws KeeperException, InterruptedException {
|
||||
/* Initial slightly naive implementation (later on we should consider some caching between updates...).
|
||||
* For updates:
|
||||
* - Read the state.json file from Zookeeper
|
||||
* - Run the updater to execute the changes on top of that file
|
||||
* - Compare and Swap the file with the new version (fail if something else changed ZK in the meantime)
|
||||
* - Retry a few times all above steps if update is failing.
|
||||
*
|
||||
* For creations:
|
||||
* - Build the state.json file using the updater
|
||||
* - Try to write it to Zookeeper (do not overwrite if it exists)
|
||||
* - Fail (without retries) if write failed.
|
||||
*/
|
||||
|
||||
// Note we DO NOT track nor use the live nodes in the cluster state.
|
||||
// That may means the two abstractions (collection metadata vs. nodes) should be separated.
|
||||
// For now trying to diverge as little as possible from existing data structures and code given the need to
|
||||
// support both the old way (Overseer) and new way (distributed) of handling cluster state update.
|
||||
final Set<String> liveNodes = Collections.emptySet();
|
||||
|
||||
// Per Replica States updates are done before all other updates and not subject to the number of attempts of CAS
|
||||
// made here, given they have their own CAS strategy and implementation (see PerReplicaStatesOps.persist()).
|
||||
boolean firstAttempt = true;
|
||||
|
||||
// When there are multiple retries of state.json write and the cluster state gets updated over and over again with
|
||||
// the changes done in the per replica states, we avoid refetching those multiple times.
|
||||
PerReplicaStates fetchedPerReplicaStates = null;
|
||||
|
||||
// Later on (when Collection API commands are distributed) we will have to rely on the version of state.json
|
||||
// to implement the replacement of Collection API locking. Then we should not blindly retry cluster state updates
|
||||
// as we do here but instead intelligently fail (or retry completely) the Collection API call when seeing that
|
||||
// state.json was changed by a concurrent command execution.
|
||||
// The loop below is ok for distributing cluster state updates from Overseer to all nodes while Collection API
|
||||
// commands are still executed on the Overseer and manage their locking the old fashioned way.
|
||||
for (int attempt = 0; attempt < CAS_MAX_ATTEMPTS; attempt++) {
|
||||
// Start by reading the current state.json (if this is an update).
|
||||
// TODO Eventually rethink the way each node manages and caches its copy of the cluster state. Knowing about all collections in the cluster might not be needed.
|
||||
ClusterState initialClusterState;
|
||||
if (updater.isCollectionCreation()) {
|
||||
initialClusterState = new ClusterState(liveNodes, Collections.emptyMap());
|
||||
} else {
|
||||
// Get the state for existing data in ZK (and if no data exists we should fail)
|
||||
initialClusterState = fetchStateForCollection();
|
||||
}
|
||||
|
||||
// Apply the desired changes. Note that the cluster state passed to the chain of mutators is totally up to date
|
||||
// (it's read from ZK just above). So assumptions made in the mutators (like SliceMutator.removeReplica() deleting
|
||||
// the whole collection if it's not found) are ok. Actually in the removeReplica case, the collection will always
|
||||
// exist otherwise the call to fetchStateForCollection() above would have failed.
|
||||
updater.computeUpdates(initialClusterState);
|
||||
|
||||
ClusterState updatedState = updater.getUpdatedClusterState();
|
||||
List<PerReplicaStatesOps> allStatesOps = updater.getPerReplicaStatesOps();
|
||||
|
||||
if (firstAttempt && allStatesOps != null) {
|
||||
// Do the per replica states updates (if any) before the state.json update (if any)
|
||||
firstAttempt = false;
|
||||
|
||||
// The parent node of the per replica state nodes happens to be the node of state.json.
|
||||
String prsParentNode = ZkStateReader.getCollectionPath(updater.getCollectionName());
|
||||
|
||||
for (PerReplicaStatesOps prso : allStatesOps) {
|
||||
prso.persist(prsParentNode, zkStateReader.getZkClient());
|
||||
}
|
||||
}
|
||||
|
||||
if (updatedState == null) {
|
||||
// No update to state.json needed
|
||||
return;
|
||||
}
|
||||
|
||||
// Get the latest version of the collection from the cluster state first.
|
||||
// There is no notion of "cached" here (the boolean passed below) as we the updatedState is based on CollectionRef
|
||||
DocCollection docCollection = updatedState.getCollectionOrNull(updater.getCollectionName(), true);
|
||||
|
||||
// If we did update per replica states and we're also updating state.json, update the content of state.json to reflect
|
||||
// the changes made to replica states. Not strictly necessary (the state source of truth is in per replica states), but nice to have...
|
||||
if (allStatesOps != null) {
|
||||
if (docCollection != null) {
|
||||
// Fetch the per replica states updates done previously or skip fetching if we already have them
|
||||
fetchedPerReplicaStates = PerReplicaStates.fetch(docCollection.getZNode(), zkStateReader.getZkClient(), fetchedPerReplicaStates);
|
||||
// Transpose the per replica states into the cluster state
|
||||
updatedState = updatedState.copyWith(updater.getCollectionName(), docCollection.copyWith(fetchedPerReplicaStates));
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
// Try to do a conditional update (a.k.a. CAS: compare and swap).
|
||||
doStateDotJsonCasUpdate(updatedState);
|
||||
return; // state.json updated successfully.
|
||||
} catch (KeeperException.BadVersionException bve) {
|
||||
if (updater.isCollectionCreation()) {
|
||||
// Not expecting to see this exception when creating new state.json fails, so throwing it up the food chain.
|
||||
throw bve;
|
||||
}
|
||||
}
|
||||
// We've tried to update an existing state.json and got a BadVersionException. We'll try again a few times.
|
||||
// When only two threads compete, no point in waiting: if we lost this time we'll get it next time right away.
|
||||
// But if more threads compete, then waiting a bit (random delay) can improve our chances. The delay should likely
|
||||
// be proportional to the time between reading the cluster state and updating it. We can measure it in the loop above.
|
||||
// With "per replica states" collections, concurrent attempts of even just two threads are expected to be extremely rare.
|
||||
}
|
||||
|
||||
// We made quite a few attempts but failed repeatedly. This is pretty bad but we can't loop trying forever.
|
||||
// Offering a job to the Overseer wouldn't usually fail if the ZK queue can be written to (but the Overseer can then
|
||||
// loop forever attempting the update).
|
||||
// We do want whoever called us to fail right away rather than to wait for a cluster change and timeout because it
|
||||
// didn't happen. Likely need to review call by call what is the appropriate behaviour, especially once Collection
|
||||
// API is distributed (because then the Collection API call will fail if the underlying cluster state update cannot
|
||||
// be done, and that's a desirable thing).
|
||||
throw new KeeperException.BadVersionException(ZkStateReader.getCollectionPath(updater.getCollectionName()));
|
||||
}
|
||||
|
||||
/**
|
||||
* After the computing of the new {@link ClusterState} containing all needed updates to the collection based on what the
|
||||
* {@link StateChangeCalculator} computed, this method does an update in ZK to the collection's {@code state.json}. It is the
|
||||
* equivalent of Overseer's {@link ZkStateWriter#writePendingUpdates} (in its actions related to {@code state.json}
|
||||
* as opposed to the per replica states).
|
||||
* <p>
|
||||
* Note that in a similar way to what happens in {@link ZkStateWriter#writePendingUpdates}, collection delete is handled
|
||||
* as a special case. (see comment on {@link DistributedClusterStateUpdater.StateChangeRecorder.RecordedMutationsPlayer}
|
||||
* on why the code has to be duplicated)<p>
|
||||
*
|
||||
* <b>Note for the future:</b> Given this method is where the actually write to ZK is done, that's the place where we
|
||||
* can rebuild a DocCollection with updated zk version. Eventually if we maintain a cache of recently used collections,
|
||||
* we want to capture the updated collection and put it in the cache to avoid reading it again (unless it changed,
|
||||
* the CAS will fail and we will refresh).<p>
|
||||
*
|
||||
* This could serve as the basis for a strategy where each node does not need any view of all collections in the cluster
|
||||
* but only a cache of recently used collections (possibly not even needing watches on them, but we'll discuss this later).
|
||||
*/
|
||||
private void doStateDotJsonCasUpdate(ClusterState updatedState) throws KeeperException, InterruptedException {
|
||||
String jsonPath = ZkStateReader.getCollectionPath(updater.getCollectionName());
|
||||
|
||||
// Collection delete
|
||||
if (!updatedState.hasCollection(updater.getCollectionName())) {
|
||||
// We do not have a collection znode version to test we delete the right version of state.json. But this doesn't really matter:
|
||||
// if we had one, and the delete failed (because state.json got updated in the meantime), we would re-read the collection
|
||||
// state, update our version, run the CAS delete again and it will pass. Which means that one way or another, deletes are final.
|
||||
// I hope nobody deletes a collection then creates a new one with the same name immediately (although the creation should fail
|
||||
// if the znode still exists, so the creation would only succeed after the delete made it, and we're ok).
|
||||
// With Overseer based updates the same behavior can be observed: a collection update is enqueued followed by the
|
||||
// collection delete before the update was executed.
|
||||
log.debug("going to recursively delete state.json at {}", jsonPath);
|
||||
zkStateReader.getZkClient().clean(jsonPath);
|
||||
} else {
|
||||
// Collection update or creation
|
||||
DocCollection collection = updatedState.getCollection(updater.getCollectionName());
|
||||
byte[] stateJson = Utils.toJSON(singletonMap(updater.getCollectionName(), collection));
|
||||
|
||||
if (updater.isCollectionCreation()) {
|
||||
// The state.json file does not exist yet (more precisely it is assumed not to exist)
|
||||
log.debug("going to create collection {}", jsonPath);
|
||||
zkStateReader.getZkClient().create(jsonPath, stateJson, CreateMode.PERSISTENT, true);
|
||||
} else {
|
||||
// We're updating an existing state.json
|
||||
if (log.isDebugEnabled()) {
|
||||
log.debug("going to update collection {} version: {}", jsonPath, collection.getZNodeVersion());
|
||||
}
|
||||
zkStateReader.getZkClient().setData(jsonPath, stateJson, collection.getZNodeVersion(), true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link ClusterState} with the state of an existing single collection, with no live nodes information.
|
||||
* Eventually this state should be reused across calls if it is fresh enough... (we have to deal anyway with failures
|
||||
* of conditional updates so trying to use non fresh data is ok, a second attempt will be made)
|
||||
*/
|
||||
private ClusterState fetchStateForCollection() throws KeeperException, InterruptedException {
|
||||
String collectionStatePath = ZkStateReader.getCollectionPath(updater.getCollectionName());
|
||||
Stat stat = new Stat();
|
||||
byte[] data = zkStateReader.getZkClient().getData(collectionStatePath, null, stat, true);
|
||||
ClusterState clusterState = ClusterState.createFromJson(stat.getVersion(), data, Collections.emptySet());
|
||||
return clusterState;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Class handling the distributed updates of collection's Zookeeper files {@code state.json} based on multiple updates
|
||||
* applied to a single collection (as is sometimes done by *Cmd classes implementing the Collection API commands).<p>
|
||||
* Previously these updates were sent one by one to Overseer and then grouped by org.apache.solr.cloud.Overseer.ClusterStateUpdater.
|
||||
* <p>
|
||||
* Records desired changes to {@code state.json} files in Zookeeper (as are done by the family of mutator classes such as
|
||||
* {@link org.apache.solr.cloud.overseer.ClusterStateMutator}, {@link org.apache.solr.cloud.overseer.CollectionMutator}
|
||||
* etc.) in order to be able to later execute them on the actual content of the {@code state.json} files using optimistic
|
||||
* locking (and retry a few times if the optimistic locking failed).
|
||||
* <p>
|
||||
* Instances are <b>not</b> thread safe.
|
||||
*/
|
||||
public static class StateChangeRecorder {
|
||||
final List<Pair<MutatingCommand, ZkNodeProps>> mutations;
|
||||
/**
|
||||
* The collection name for which are all recorded commands
|
||||
*/
|
||||
final String collectionName;
|
||||
/**
|
||||
* {@code true} if recorded commands assume creation of the collection {@code state.json} file.<br>
|
||||
* {@code false} if an existing {@code state.json} is to be updated.<p>
|
||||
* <p>
|
||||
* This variable is used for defensive programming and catching issues. It might be removed once we're done removing and testing
|
||||
* the distribution of the cluster state update updates.
|
||||
*/
|
||||
final boolean isCollectionCreation;
|
||||
|
||||
/**
|
||||
* For collection creation recording, there should be only one actual creation (and it should be the first recorded command
|
||||
*/
|
||||
boolean creationCommandRecorded = false;
|
||||
|
||||
private StateChangeRecorder(String collectionName, boolean isCollectionCreation) {
|
||||
if (collectionName == null) {
|
||||
final String err = "Internal bug. collectionName=null (isCollectionCreation=" + isCollectionCreation + ")";
|
||||
log.error(err);
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, err);
|
||||
}
|
||||
mutations = new LinkedList<>();
|
||||
this.collectionName = collectionName;
|
||||
this.isCollectionCreation = isCollectionCreation;
|
||||
}
|
||||
|
||||
/**
|
||||
* Records a mutation method and its parameters so that it can be executed later to modify the corresponding Zookeeper state.
|
||||
* Note the message is identical to the one used for communicating with Overseer (at least initially) so it also contains
|
||||
* the action in parameter {@link org.apache.solr.cloud.Overseer#QUEUE_OPERATION}, but that value is ignored here
|
||||
* in favor of the value passed in {@code command}.
|
||||
*
|
||||
* @param message the parameters associated with the command that are kept in the recorded mutations to be played
|
||||
* later. Note that this call usually replaces a call to {@link org.apache.solr.cloud.Overseer#offerStateUpdate(byte[])}
|
||||
* that is passed a <b>copy</b> of the data!<br>
|
||||
* This means that if {@code message} passed in here is reused before the recorded commands are replayed,
|
||||
* things will break! Need to make sure all places calling this method do not reuse the data passed in
|
||||
* (otherwise need to make a copy).
|
||||
*/
|
||||
public void record(MutatingCommand command, ZkNodeProps message) {
|
||||
if (isCollectionCreation && !creationCommandRecorded) {
|
||||
// First received command should be collection creation
|
||||
if (!command.isCollectionCreation()) {
|
||||
final String err = "Internal bug. Creation of collection " + collectionName + " unexpected command " + command.name();
|
||||
log.error(err);
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, err);
|
||||
}
|
||||
creationCommandRecorded = true;
|
||||
} else {
|
||||
// If collection creation already received or not expected, should not get (another) one
|
||||
if (command.isCollectionCreation()) {
|
||||
final String err = "Internal bug. Creation of collection " + collectionName + " unexpected command " +
|
||||
command.name() + " (isCollectionCreation=" + isCollectionCreation + ", creationCommandRecorded=" + creationCommandRecorded + ")";
|
||||
log.error(err);
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, err);
|
||||
}
|
||||
}
|
||||
|
||||
if (!collectionName.equals(command.getCollectionName(message))) {
|
||||
// All recorded commands must be for same collection
|
||||
final String err = "Internal bug. State change for collection " + collectionName +
|
||||
" received command " + command + " for collection " + command.getCollectionName(message);
|
||||
log.error(err);
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, err);
|
||||
}
|
||||
|
||||
mutations.add(new Pair<>(command, message));
|
||||
}
|
||||
|
||||
/**
|
||||
* This class allows taking the initial (passed in) cluster state, applying to it cluster mutations and returning the resulting
|
||||
* cluster state.
|
||||
* <p>
|
||||
* It is used to be able to try to apply multiple times a set of changes to cluster state when the Compare And Swap (conditional
|
||||
* update) fails due to concurrent modification.
|
||||
* <p>
|
||||
* For each mutation, a {@link ZkWriteCommand} is first created (capturing how the mutation impacts the cluster state), this is
|
||||
* the equivalent of what the Overseer is doing in ClusterStateUpdater.processMessage().<p>
|
||||
* <p>
|
||||
* Then, a new {@link ClusterState} is built by replacing the existing collection by its new value as computed in the
|
||||
* {@link ZkWriteCommand}. This is done by Overseer in {@link ZkStateWriter#enqueueUpdate} (and {@link ZkStateWriter} is hard
|
||||
* tu reuse because although it contains the logic for doing the update that would be needed here, it is coupled with the
|
||||
* actual instance of {@link ClusterState} being maintained, the stream of updates to be applied to it and applying
|
||||
* the per replica state changes).
|
||||
*/
|
||||
private static class RecordedMutationsPlayer implements StateChangeCalculator {
|
||||
private final SolrCloudManager scm;
|
||||
private final String collectionName;
|
||||
private final boolean isCollectionCreation;
|
||||
final List<Pair<MutatingCommand, ZkNodeProps>> mutations;
|
||||
|
||||
// null means no update to state.json needed. Set in computeUpdates()
|
||||
private ClusterState computedState = null;
|
||||
|
||||
// null means no updates needed to the per replica state znodes. Set in computeUpdates()
|
||||
private List<PerReplicaStatesOps> replicaOpsList = null;
|
||||
|
||||
RecordedMutationsPlayer(SolrCloudManager scm, String collectionName, boolean isCollectionCreation, List<Pair<MutatingCommand, ZkNodeProps>> mutations) {
|
||||
this.scm = scm;
|
||||
this.collectionName = collectionName;
|
||||
this.isCollectionCreation = isCollectionCreation;
|
||||
this.mutations = mutations;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCollectionName() {
|
||||
return collectionName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCollectionCreation() {
|
||||
return isCollectionCreation;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void computeUpdates(ClusterState clusterState) {
|
||||
boolean hasJsonUpdates = false;
|
||||
List<PerReplicaStatesOps> perReplicaStateOps = new LinkedList<>();
|
||||
for (Pair<MutatingCommand, ZkNodeProps> mutation : mutations) {
|
||||
MutatingCommand mutatingCommand = mutation.first();
|
||||
ZkNodeProps message = mutation.second();
|
||||
try {
|
||||
ZkWriteCommand zkcmd = mutatingCommand.buildWriteCommand(scm, clusterState, message);
|
||||
if (zkcmd != ZkStateWriter.NO_OP) {
|
||||
hasJsonUpdates = true;
|
||||
clusterState = clusterState.copyWith(zkcmd.name, zkcmd.collection);
|
||||
}
|
||||
if (zkcmd.ops != null && zkcmd.ops.get() != null) {
|
||||
perReplicaStateOps.add(zkcmd.ops);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
// Seems weird to skip rather than fail, but that's what Overseer is doing (see ClusterStateUpdater.processQueueItem()).
|
||||
// Maybe in the new distributed update world we should make the caller fail? (something Overseer cluster state updater can't do)
|
||||
// To be reconsidered once Collection API commands are distributed because then cluster updates are done synchronously and
|
||||
// have the opportunity to make the Collection API call fail directly.
|
||||
log.error("Distributed cluster state update could not process the current clusterstate state update message, skipping the message: {}", message, e);
|
||||
}
|
||||
}
|
||||
|
||||
computedState = hasJsonUpdates ? clusterState : null;
|
||||
replicaOpsList = perReplicaStateOps.isEmpty() ? null : perReplicaStateOps;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ClusterState getUpdatedClusterState() {
|
||||
return computedState;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<PerReplicaStatesOps> getPerReplicaStatesOps() {
|
||||
return replicaOpsList;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Using optimistic locking (and retries when needed) updates Zookeeper with the changes previously recorded by calls
|
||||
* to {@link #record(MutatingCommand, ZkNodeProps)}.
|
||||
*/
|
||||
public void executeStateUpdates(SolrCloudManager scm, ZkStateReader zkStateReader) throws KeeperException, InterruptedException {
|
||||
if (log.isDebugEnabled()) {
|
||||
log.debug("Executing updates for collection " + collectionName + ", is creation=" + isCollectionCreation + ", " + mutations.size() + " recorded mutations.", new Exception("StackTraceOnly")); // nowarn
|
||||
}
|
||||
if (mutations.isEmpty()) {
|
||||
final String err = "Internal bug. Unexpected empty set of mutations to apply for collection " + collectionName;
|
||||
log.error(err);
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, err);
|
||||
}
|
||||
|
||||
RecordedMutationsPlayer mutationPlayer = new RecordedMutationsPlayer(scm, collectionName, isCollectionCreation, mutations);
|
||||
ZkUpdateApplicator.applyUpdate(zkStateReader, mutationPlayer);
|
||||
|
||||
// TODO update stats here for the various commands executed successfully or not?
|
||||
// This would replace the stats about cluster state updates that the Collection API currently makes available using
|
||||
// the OVERSEERSTATUS command, but obviously would be per node and will not have stats about queues (since there
|
||||
// will be no queues). Would be useful in some tests though, for example TestSkipOverseerOperations.
|
||||
// Probably better to rethink what types of stats are expected from a distributed system rather than trying to present
|
||||
// those previously provided by a central server in the system (the Overseer).
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This class handles the changes to be made as a result of a {@link OverseerAction#DOWNNODE} event.<p>
|
||||
*
|
||||
* Instances of this class deal with a single collection. Static method {@link #executeNodeDownStateUpdate} is the entry point
|
||||
* dealing with a node going down and processing all collections.
|
||||
*/
|
||||
private static class CollectionNodeDownChangeCalculator implements StateChangeCalculator {
|
||||
private final String collectionName;
|
||||
private final String nodeName;
|
||||
|
||||
// null means no update to state.json needed. Set in computeUpdates()
|
||||
private ClusterState computedState = null;
|
||||
|
||||
// null means no updates needed to the per replica state znodes. Set in computeUpdates()
|
||||
private List<PerReplicaStatesOps> replicaOpsList = null;
|
||||
|
||||
/**
|
||||
* Entry point to mark all replicas of all collections present on a single node as being DOWN (because the node is down)
|
||||
*/
|
||||
public static void executeNodeDownStateUpdate(String nodeName, ZkStateReader zkStateReader) {
|
||||
// This code does a version of what NodeMutator.downNode() is doing. We can't assume we have a cache of the collections,
|
||||
// so we're going to read all of them from ZK, fetch the state.json for each and if it has any replicas on the
|
||||
// failed node, do an update (conditional of course) of the state.json
|
||||
|
||||
// For Per Replica States collections there is still a need to read state.json, but the update of state.json is replaced
|
||||
// by a few znode deletions and creations. Might be faster or slower overall, depending on the number of impacted
|
||||
// replicas of such a collection and the total size of that collection's state.json.
|
||||
|
||||
// Note code here also has to duplicate some of the work done in ZkStateReader because ZkStateReader couples reading of
|
||||
// the cluster state and maintaining a cached copy of the cluster state. Something likely to be refactored later (once
|
||||
// Overseer is totally removed and Zookeeper access patterns become clearer).
|
||||
|
||||
log.debug("DownNode state change invoked for node: {}", nodeName);
|
||||
|
||||
try {
|
||||
final List<String> collectionNames = zkStateReader.getZkClient().getChildren(COLLECTIONS_ZKNODE, null, true);
|
||||
|
||||
// Collections are totally independent of each other. Multiple threads could share the load here (need a ZK connection for each though).
|
||||
for (String collectionName : collectionNames) {
|
||||
CollectionNodeDownChangeCalculator collectionUpdater = new CollectionNodeDownChangeCalculator(collectionName, nodeName);
|
||||
ZkUpdateApplicator.applyUpdate(zkStateReader, collectionUpdater);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
if (e instanceof InterruptedException) {
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
// Overseer behavior is to log an error and carry on when a message fails. See Overseer.ClusterStateUpdater.processQueueItem()
|
||||
log.error("Could not successfully process DOWNNODE, giving up", e);
|
||||
}
|
||||
}
|
||||
|
||||
private CollectionNodeDownChangeCalculator(String collectionName, String nodeName) {
|
||||
this.collectionName = collectionName;
|
||||
this.nodeName = nodeName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCollectionName() {
|
||||
return collectionName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCollectionCreation() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void computeUpdates(ClusterState clusterState) {
|
||||
final DocCollection docCollection = clusterState.getCollectionOrNull(collectionName);
|
||||
Optional<ZkWriteCommand> result = docCollection != null ? NodeMutator.computeCollectionUpdate(nodeName, collectionName, docCollection) : Optional.empty();
|
||||
|
||||
if (docCollection == null) {
|
||||
// This is possible but should be rare. Logging warn in case it is seen often and likely a sign of another issue
|
||||
log.warn("Processing DOWNNODE, collection " + collectionName + " disappeared during iteration"); // nowarn
|
||||
}
|
||||
|
||||
if (result.isPresent()) {
|
||||
ZkWriteCommand zkcmd = result.get();
|
||||
computedState = (zkcmd != ZkStateWriter.NO_OP) ? clusterState.copyWith(zkcmd.name, zkcmd.collection) : null;
|
||||
replicaOpsList = (zkcmd.ops != null && zkcmd.ops.get() != null) ? Collections.singletonList(zkcmd.ops) : null;
|
||||
} else {
|
||||
computedState = null;
|
||||
replicaOpsList = null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ClusterState getUpdatedClusterState() {
|
||||
return computedState;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<PerReplicaStatesOps> getPerReplicaStatesOps() {
|
||||
return replicaOpsList;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -596,8 +596,6 @@ public class Overseer implements SolrCloseable {
|
|||
|
||||
private OverseerThread updaterThread;
|
||||
|
||||
private OverseerThread triggerThread;
|
||||
|
||||
private final ZkStateReader reader;
|
||||
|
||||
private final HttpShardHandler shardHandler;
|
||||
|
@ -616,6 +614,7 @@ public class Overseer implements SolrCloseable {
|
|||
private volatile boolean systemCollCompatCheck = true;
|
||||
|
||||
private CloudConfig config;
|
||||
private final DistributedClusterStateUpdater distributedClusterStateUpdater;
|
||||
|
||||
// overseer not responsible for closing reader
|
||||
public Overseer(HttpShardHandler shardHandler,
|
||||
|
@ -629,6 +628,7 @@ public class Overseer implements SolrCloseable {
|
|||
this.zkController = zkController;
|
||||
this.stats = new Stats();
|
||||
this.config = config;
|
||||
this.distributedClusterStateUpdater = new DistributedClusterStateUpdater(config.getDistributedClusterStateUpdates());
|
||||
|
||||
this.solrMetricsContext = new SolrMetricsContext(zkController.getCoreContainer().getMetricManager(), SolrInfoBean.Group.overseer.toString(), metricTag);
|
||||
}
|
||||
|
@ -650,7 +650,9 @@ public class Overseer implements SolrCloseable {
|
|||
|
||||
ThreadGroup ccTg = new ThreadGroup("Overseer collection creation process.");
|
||||
|
||||
OverseerNodePrioritizer overseerPrioritizer = new OverseerNodePrioritizer(reader, getStateUpdateQueue(), adminPath, shardHandler.getShardHandlerFactory());
|
||||
// Below is the only non test usage of the "cluster state update" queue even when distributed cluster state updates are enabled.
|
||||
// That queue is used to tell the Overseer to quit. As long as we have an Overseer, we need to support this.
|
||||
OverseerNodePrioritizer overseerPrioritizer = new OverseerNodePrioritizer(reader, this, adminPath, shardHandler.getShardHandlerFactory());
|
||||
overseerCollectionConfigSetProcessor = new OverseerCollectionConfigSetProcessor(reader, id, shardHandler, adminPath, stats, Overseer.this, overseerPrioritizer, solrMetricsContext);
|
||||
ccThread = new OverseerThread(ccTg, overseerCollectionConfigSetProcessor, "OverseerCollectionConfigSetProcessor-" + id);
|
||||
ccThread.setDaemon(true);
|
||||
|
@ -814,6 +816,10 @@ public class Overseer implements SolrCloseable {
|
|||
return zkController.getSolrCloudManager();
|
||||
}
|
||||
|
||||
public DistributedClusterStateUpdater getDistributedClusterStateUpdater() {
|
||||
return distributedClusterStateUpdater;
|
||||
}
|
||||
|
||||
/**
|
||||
* For tests.
|
||||
*
|
||||
|
@ -824,15 +830,6 @@ public class Overseer implements SolrCloseable {
|
|||
return updaterThread;
|
||||
}
|
||||
|
||||
/**
|
||||
* For tests.
|
||||
* @lucene.internal
|
||||
* @return trigger thread
|
||||
*/
|
||||
public synchronized OverseerThread getTriggerThread() {
|
||||
return triggerThread;
|
||||
}
|
||||
|
||||
public synchronized void close() {
|
||||
if (this.id != null) {
|
||||
log.info("Overseer (id={}) closing", id);
|
||||
|
@ -863,10 +860,6 @@ public class Overseer implements SolrCloseable {
|
|||
IOUtils.closeQuietly(ccThread);
|
||||
ccThread.interrupt();
|
||||
}
|
||||
if (triggerThread != null) {
|
||||
IOUtils.closeQuietly(triggerThread);
|
||||
triggerThread.interrupt();
|
||||
}
|
||||
if (updaterThread != null) {
|
||||
try {
|
||||
updaterThread.join();
|
||||
|
@ -877,14 +870,8 @@ public class Overseer implements SolrCloseable {
|
|||
ccThread.join();
|
||||
} catch (InterruptedException e) {}
|
||||
}
|
||||
if (triggerThread != null) {
|
||||
try {
|
||||
triggerThread.join();
|
||||
} catch (InterruptedException e) {}
|
||||
}
|
||||
updaterThread = null;
|
||||
ccThread = null;
|
||||
triggerThread = null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -904,6 +891,17 @@ public class Overseer implements SolrCloseable {
|
|||
* @return a {@link ZkDistributedQueue} object
|
||||
*/
|
||||
ZkDistributedQueue getStateUpdateQueue() {
|
||||
if (distributedClusterStateUpdater.isDistributedStateUpdate()) {
|
||||
throw new IllegalStateException("Cluster state is done in a distributed way, should not try to access ZK queue");
|
||||
}
|
||||
return getStateUpdateQueue(new Stats());
|
||||
}
|
||||
|
||||
/**
|
||||
* Separated into its own method from {@link #getStateUpdateQueue()} that does the same thing because this one is legit
|
||||
* to call even when cluster state updates are distributed whereas the other one is not.
|
||||
*/
|
||||
ZkDistributedQueue getOverseerQuitNotificationQueue() {
|
||||
return getStateUpdateQueue(new Stats());
|
||||
}
|
||||
|
||||
|
@ -1064,6 +1062,14 @@ public class Overseer implements SolrCloseable {
|
|||
}
|
||||
|
||||
public void offerStateUpdate(byte[] data) throws KeeperException, InterruptedException {
|
||||
// When cluster state update is distributed, the Overseer cluster state update queue should only ever receive QUIT messages.
|
||||
// These go to sendQuitToOverseer for execution path clarity.
|
||||
if (distributedClusterStateUpdater.isDistributedStateUpdate()) {
|
||||
final ZkNodeProps message = ZkNodeProps.load(data);
|
||||
final String operation = message.getStr(QUEUE_OPERATION);
|
||||
log.error("Received unexpected message on Overseer cluster state updater for " + operation + " when distributed updates are configured"); // nowarn
|
||||
throw new RuntimeException("Message " + operation + " offered to state update queue when distributed state update is configured.");
|
||||
}
|
||||
if (zkController.getZkClient().isClosed()) {
|
||||
throw new AlreadyClosedException();
|
||||
}
|
||||
|
@ -1080,7 +1086,16 @@ public class Overseer implements SolrCloseable {
|
|||
|
||||
public interface Message {
|
||||
ClusterState run(ClusterState clusterState, Overseer overseer) throws Exception;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* This method enqueues a QUIT message to the overseer of given id.
|
||||
* Effect is similar to building the message then calling {@link #offerStateUpdate} but this method can legitimately be called
|
||||
* when cluster state update is distributed (and Overseer cluster state updater not really used) while {@link #offerStateUpdate} is not.
|
||||
* Note that sending "QUIT" to overseer is not a cluster state update and was likely added to this queue because it was simpler.
|
||||
*/
|
||||
public void sendQuitToOverseer(String overseerId) throws KeeperException, InterruptedException {
|
||||
getOverseerQuitNotificationQueue().offer(
|
||||
Utils.toJSON(new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.QUIT.toLower(), ID, overseerId)));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,9 +20,7 @@ import java.lang.invoke.MethodHandles;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.cloud.overseer.OverseerAction;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.params.CoreAdminParams;
|
||||
import org.apache.solr.common.params.CoreAdminParams.CoreAdminAction;
|
||||
|
@ -35,8 +33,6 @@ import org.apache.zookeeper.data.Stat;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.ID;
|
||||
|
||||
/**
|
||||
* Responsible for prioritization of Overseer nodes, for example with the
|
||||
* ADDROLE collection command.
|
||||
|
@ -49,13 +45,16 @@ public class OverseerNodePrioritizer {
|
|||
private final String adminPath;
|
||||
private final ShardHandlerFactory shardHandlerFactory;
|
||||
|
||||
private ZkDistributedQueue stateUpdateQueue;
|
||||
/**
|
||||
* Only used to send QUIT to the overseer
|
||||
*/
|
||||
private final Overseer overseer;
|
||||
|
||||
public OverseerNodePrioritizer(ZkStateReader zkStateReader, ZkDistributedQueue stateUpdateQueue, String adminPath, ShardHandlerFactory shardHandlerFactory) {
|
||||
public OverseerNodePrioritizer(ZkStateReader zkStateReader, Overseer overseer, String adminPath, ShardHandlerFactory shardHandlerFactory) {
|
||||
this.zkStateReader = zkStateReader;
|
||||
this.adminPath = adminPath;
|
||||
this.shardHandlerFactory = shardHandlerFactory;
|
||||
this.stateUpdateQueue = stateUpdateQueue;
|
||||
this.overseer = overseer;
|
||||
}
|
||||
|
||||
public synchronized void prioritizeOverseerNodes(String overseerId) throws Exception {
|
||||
|
@ -95,10 +94,7 @@ public class OverseerNodePrioritizer {
|
|||
invokeOverseerOp(electionNodes.get(1), "rejoin");//ask second inline to go behind
|
||||
}
|
||||
//now ask the current leader to QUIT , so that the designate can takeover
|
||||
stateUpdateQueue.offer(
|
||||
Utils.toJSON(new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.QUIT.toLower(),
|
||||
ID, OverseerTaskProcessor.getLeaderId(zkStateReader.getZkClient()))));
|
||||
|
||||
overseer.sendQuitToOverseer(OverseerTaskProcessor.getLeaderId(zkStateReader.getZkClient()));
|
||||
}
|
||||
|
||||
private void invokeOverseerOp(String electionNode, String op) {
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.zookeeper.data.Stat;
|
|||
|
||||
/**
|
||||
* Refresh the Cluster State for a given collection
|
||||
*
|
||||
*/
|
||||
public class RefreshCollectionMessage implements Overseer.Message {
|
||||
public final String collection;
|
||||
|
@ -44,7 +43,7 @@ public class RefreshCollectionMessage implements Overseer.Message {
|
|||
//our state is up to date
|
||||
return clusterState;
|
||||
} else {
|
||||
coll = ZkStateReader.getCollectionLive(overseer.getZkStateReader(), collection);
|
||||
coll = overseer.getZkStateReader().getCollectionLive(collection);
|
||||
return clusterState.copyWith(collection, coll);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -51,16 +51,17 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
|||
|
||||
private final CoreContainer cc;
|
||||
private final SyncStrategy syncStrategy;
|
||||
private final DistributedClusterStateUpdater distributedClusterStateUpdater;
|
||||
|
||||
private volatile boolean isClosed = false;
|
||||
|
||||
public ShardLeaderElectionContext(LeaderElector leaderElector,
|
||||
final String shardId, final String collection,
|
||||
final String coreNodeName, ZkNodeProps props, ZkController zkController, CoreContainer cc) {
|
||||
super(leaderElector, shardId, collection, coreNodeName, props,
|
||||
zkController);
|
||||
super(leaderElector, shardId, collection, coreNodeName, props, zkController);
|
||||
this.cc = cc;
|
||||
syncStrategy = new SyncStrategy(cc);
|
||||
this.syncStrategy = new SyncStrategy(cc);
|
||||
this.distributedClusterStateUpdater = zkController.getDistributedClusterStateUpdater();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -116,7 +117,13 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
|||
// Clear the leader in clusterstate. We only need to worry about this if there is actually more than one replica.
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.LEADER.toLower(),
|
||||
ZkStateReader.SHARD_ID_PROP, shardId, ZkStateReader.COLLECTION_PROP, collection);
|
||||
zkController.getOverseer().getStateUpdateQueue().offer(Utils.toJSON(m));
|
||||
|
||||
if (distributedClusterStateUpdater.isDistributedStateUpdate()) {
|
||||
distributedClusterStateUpdater.doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.SliceSetShardLeader, m,
|
||||
zkController.getSolrCloudManager(), zkStateReader);
|
||||
} else {
|
||||
zkController.getOverseer().getStateUpdateQueue().offer(Utils.toJSON(m));
|
||||
}
|
||||
}
|
||||
|
||||
if (!weAreReplacement) {
|
||||
|
|
|
@ -175,8 +175,13 @@ class ShardLeaderElectionContextBase extends ElectionContext {
|
|||
assert zkController != null;
|
||||
assert zkController.getOverseer() != null;
|
||||
DocCollection coll = zkStateReader.getCollection(this.collection);
|
||||
if (coll == null || ZkController.sendToOverseer(coll, id)) {
|
||||
zkController.getOverseer().offerStateUpdate(Utils.toJSON(m));
|
||||
if (coll == null || ZkController.updateStateDotJson(coll, id)) {
|
||||
if (zkController.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
zkController.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.SliceSetShardLeader, m,
|
||||
zkController.getSolrCloudManager(), zkStateReader);
|
||||
} else {
|
||||
zkController.getOverseer().offerStateUpdate(Utils.toJSON(m));
|
||||
}
|
||||
} else {
|
||||
PerReplicaStates prs = PerReplicaStates.fetch(coll.getZNode(), zkClient, coll.getPerReplicaStates());
|
||||
PerReplicaStatesOps.flipLeader(zkStateReader.getClusterState().getCollection(collection).getSlice(shardId).getReplicaNames(), id, prs)
|
||||
|
|
|
@ -196,6 +196,8 @@ public class ZkController implements Closeable {
|
|||
private final CloudConfig cloudConfig;
|
||||
private final NodesSysPropsCacher sysPropsCacher;
|
||||
|
||||
private final DistributedClusterStateUpdater distributedClusterStateUpdater;
|
||||
|
||||
private LeaderElector overseerElector;
|
||||
|
||||
private Map<String, ReplicateFromLeader> replicateFromLeaders = new ConcurrentHashMap<>();
|
||||
|
@ -285,6 +287,9 @@ public class ZkController implements Closeable {
|
|||
|
||||
this.cloudConfig = cloudConfig;
|
||||
|
||||
// Use the configured way to do cluster state update (Overseer queue vs distributed)
|
||||
distributedClusterStateUpdater = new DistributedClusterStateUpdater(cloudConfig.getDistributedClusterStateUpdates());
|
||||
|
||||
this.genericCoreNodeNames = cloudConfig.getGenericCoreNodeNames();
|
||||
|
||||
// be forgiving and strip this off leading/trailing slashes
|
||||
|
@ -464,7 +469,11 @@ public class ZkController implements Closeable {
|
|||
|
||||
init();
|
||||
|
||||
this.overseerJobQueue = overseer.getStateUpdateQueue();
|
||||
if (distributedClusterStateUpdater.isDistributedStateUpdate()) {
|
||||
this.overseerJobQueue = null;
|
||||
} else {
|
||||
this.overseerJobQueue = overseer.getStateUpdateQueue();
|
||||
}
|
||||
this.overseerCollectionQueue = overseer.getCollectionQueue(zkClient);
|
||||
this.overseerConfigSetQueue = overseer.getConfigSetQueue(zkClient);
|
||||
this.sysPropsCacher = new NodesSysPropsCacher(getSolrCloudManager().getNodeStateProvider(),
|
||||
|
@ -757,6 +766,10 @@ public class ZkController implements Closeable {
|
|||
return zkStateReader.getClusterState();
|
||||
}
|
||||
|
||||
public DistributedClusterStateUpdater getDistributedClusterStateUpdater() {
|
||||
return distributedClusterStateUpdater;
|
||||
}
|
||||
|
||||
public SolrCloudManager getSolrCloudManager() {
|
||||
if (cloudManager != null) {
|
||||
return cloudManager;
|
||||
|
@ -1533,7 +1546,7 @@ public class ZkController implements Closeable {
|
|||
String coreNodeName = cd.getCloudDescriptor().getCoreNodeName();
|
||||
|
||||
Map<String,Object> props = new HashMap<>();
|
||||
props.put(Overseer.QUEUE_OPERATION, "state");
|
||||
props.put(Overseer.QUEUE_OPERATION, OverseerAction.STATE.toLower());
|
||||
props.put(ZkStateReader.STATE_PROP, state.toString());
|
||||
props.put(ZkStateReader.CORE_NAME_PROP, cd.getName());
|
||||
props.put(ZkStateReader.ROLES_PROP, cd.getCloudDescriptor().getRoles());
|
||||
|
@ -1585,8 +1598,13 @@ public class ZkController implements Closeable {
|
|||
cd.getCloudDescriptor().setLastPublished(state);
|
||||
}
|
||||
DocCollection coll = zkStateReader.getCollection(collection);
|
||||
if (forcePublish || sendToOverseer(coll, coreNodeName)) {
|
||||
overseerJobQueue.offer(Utils.toJSON(m));
|
||||
if (forcePublish || updateStateDotJson(coll, coreNodeName)) {
|
||||
if (distributedClusterStateUpdater.isDistributedStateUpdate()) {
|
||||
distributedClusterStateUpdater.doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.ReplicaSetState, m,
|
||||
getSolrCloudManager(), zkStateReader);
|
||||
} else {
|
||||
overseerJobQueue.offer(Utils.toJSON(m));
|
||||
}
|
||||
} else {
|
||||
if (log.isDebugEnabled()) {
|
||||
log.debug("bypassed overseer for message : {}", Utils.toJSONString(m));
|
||||
|
@ -1601,9 +1619,9 @@ public class ZkController implements Closeable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Whether a message needs to be sent to overseer or not
|
||||
* Returns {@code true} if a message needs to be sent to overseer (or done in a distributed way) to update state.json for the collection
|
||||
*/
|
||||
static boolean sendToOverseer(DocCollection coll, String replicaName) {
|
||||
static boolean updateStateDotJson(DocCollection coll, String replicaName) {
|
||||
if (coll == null) return true;
|
||||
if (!coll.isPerReplicaState()) return true;
|
||||
Replica r = coll.getReplica(replicaName);
|
||||
|
@ -1663,22 +1681,20 @@ public class ZkController implements Closeable {
|
|||
}
|
||||
CloudDescriptor cloudDescriptor = cd.getCloudDescriptor();
|
||||
if (removeCoreFromZk) {
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION,
|
||||
OverseerAction.DELETECORE.toLower(), ZkStateReader.CORE_NAME_PROP, coreName,
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.DELETECORE.toLower(),
|
||||
ZkStateReader.CORE_NAME_PROP, coreName,
|
||||
ZkStateReader.NODE_NAME_PROP, getNodeName(),
|
||||
ZkStateReader.COLLECTION_PROP, cloudDescriptor.getCollectionName(),
|
||||
ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName);
|
||||
overseerJobQueue.offer(Utils.toJSON(m));
|
||||
if (distributedClusterStateUpdater.isDistributedStateUpdate()) {
|
||||
distributedClusterStateUpdater.doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.SliceRemoveReplica, m,
|
||||
getSolrCloudManager(), zkStateReader);
|
||||
} else {
|
||||
overseerJobQueue.offer(Utils.toJSON(m));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void createCollection(String collection) throws Exception {
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION,
|
||||
CollectionParams.CollectionAction.CREATE.toLower(), ZkStateReader.NODE_NAME_PROP, getNodeName(),
|
||||
ZkStateReader.COLLECTION_PROP, collection);
|
||||
overseerJobQueue.offer(Utils.toJSON(m));
|
||||
}
|
||||
|
||||
public ZkStateReader getZkStateReader() {
|
||||
return zkStateReader;
|
||||
}
|
||||
|
@ -2036,6 +2052,9 @@ public class ZkController implements Closeable {
|
|||
}
|
||||
|
||||
public ZkDistributedQueue getOverseerJobQueue() {
|
||||
if (distributedClusterStateUpdater.isDistributedStateUpdate()) {
|
||||
throw new IllegalStateException("Cluster is configured with distributed state update, not expecting the queue to be retrieved");
|
||||
}
|
||||
return overseerJobQueue;
|
||||
}
|
||||
|
||||
|
@ -2632,17 +2651,26 @@ public class ZkController implements Closeable {
|
|||
*/
|
||||
public void publishNodeAsDown(String nodeName) {
|
||||
log.info("Publish node={} as DOWN", nodeName);
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.DOWNNODE.toLower(),
|
||||
ZkStateReader.NODE_NAME_PROP, nodeName);
|
||||
try {
|
||||
overseer.getStateUpdateQueue().offer(Utils.toJSON(m));
|
||||
} catch (AlreadyClosedException e) {
|
||||
log.info("Not publishing node as DOWN because a resource required to do so is already closed.");
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
log.debug("Publish node as down was interrupted.");
|
||||
} catch (KeeperException e) {
|
||||
log.warn("Could not publish node as down: ", e);
|
||||
if (distributedClusterStateUpdater.isDistributedStateUpdate()) {
|
||||
// Note that with the current implementation, when distributed cluster state updates are enabled, we mark the node
|
||||
// down synchronously from this thread, whereas the Overseer cluster state update frees this thread right away and
|
||||
// the Overseer will async mark the node down but updating all affected collections.
|
||||
// If this is an issue (i.e. takes too long), then the call below should be executed from another thread so that
|
||||
// the calling thread can immediately return.
|
||||
distributedClusterStateUpdater.executeNodeDownStateUpdate(nodeName, zkStateReader);
|
||||
} else {
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.DOWNNODE.toLower(),
|
||||
ZkStateReader.NODE_NAME_PROP, nodeName);
|
||||
try {
|
||||
overseer.getStateUpdateQueue().offer(Utils.toJSON(m));
|
||||
} catch (AlreadyClosedException e) {
|
||||
log.info("Not publishing node as DOWN because a resource required to do so is already closed.");
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
log.debug("Publish node as down was interrupted.");
|
||||
} catch (KeeperException e) {
|
||||
log.warn("Could not publish node as down: ", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -43,6 +43,7 @@ import java.util.stream.Collectors;
|
|||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
|
||||
import org.apache.solr.cloud.ActiveReplicaWatcher;
|
||||
import org.apache.solr.cloud.DistributedClusterStateUpdater;
|
||||
import org.apache.solr.cloud.Overseer;
|
||||
import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.ShardRequestTracker;
|
||||
import org.apache.solr.common.SolrCloseableLatch;
|
||||
|
@ -197,8 +198,6 @@ public class AddReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
}
|
||||
|
||||
private ModifiableSolrParams getReplicaParams(ClusterState clusterState, ZkNodeProps message, @SuppressWarnings({"rawtypes"})NamedList results, String collectionName, DocCollection coll, boolean skipCreateReplicaInClusterState, String asyncId, ShardHandler shardHandler, CreateReplica createReplica) throws IOException, InterruptedException, KeeperException {
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
|
||||
ZkStateReader zkStateReader = ocmh.zkStateReader;
|
||||
if (!skipCreateReplicaInClusterState) {
|
||||
ZkNodeProps props = new ZkNodeProps(
|
||||
|
@ -212,12 +211,19 @@ public class AddReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
if (createReplica.coreNodeName != null) {
|
||||
props = props.plus(ZkStateReader.CORE_NODE_NAME_PROP, createReplica.coreNodeName);
|
||||
}
|
||||
try {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(props));
|
||||
} catch (Exception e) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Exception updating Overseer state queue", e);
|
||||
if (ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
ocmh.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.SliceAddReplica, props,
|
||||
ocmh.cloudManager, ocmh.zkStateReader);
|
||||
} else {
|
||||
try {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(props));
|
||||
} catch (Exception e) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Exception updating Overseer state queue", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.set(CoreAdminParams.CORE_NODE_NAME,
|
||||
ocmh.waitToSeeReplicasInState(collectionName, Collections.singleton(createReplica.coreName)).get(createReplica.coreName).getName());
|
||||
|
||||
|
|
|
@ -74,7 +74,7 @@ abstract class AliasCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
new LocalSolrQueryRequest(null, createReqParams),
|
||||
null,
|
||||
ocmh.overseer.getCoreContainer().getCollectionsHandler());
|
||||
createMsgMap.put(Overseer.QUEUE_OPERATION, "create");
|
||||
createMsgMap.put(Overseer.QUEUE_OPERATION, CollectionParams.CollectionAction.CREATE.toLower());
|
||||
|
||||
NamedList results = new NamedList();
|
||||
try {
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.solr.client.solrj.cloud.DistribStateManager;
|
|||
import org.apache.solr.client.solrj.cloud.NotEmptyException;
|
||||
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
|
||||
import org.apache.solr.client.solrj.cloud.VersionedData;
|
||||
import org.apache.solr.cloud.DistributedClusterStateUpdater;
|
||||
import org.apache.solr.cloud.Overseer;
|
||||
import org.apache.solr.cloud.RefreshCollectionMessage;
|
||||
import org.apache.solr.cloud.ZkController;
|
||||
|
@ -145,17 +146,35 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
|
|||
|
||||
createCollectionZkNode(stateManager, collectionName, collectionParams);
|
||||
|
||||
// Note that in code below there are two main execution paths: Overseer based cluster state updates and distributed
|
||||
// cluster state updates (look for isDistributedStateUpdate() conditions).
|
||||
//
|
||||
// PerReplicaStates (PRS) collections follow a hybrid approach. Even when the cluster is Overseer cluster state update based,
|
||||
// these collections are created locally then the cluster state updater is notified (look for usage of RefreshCollectionMessage).
|
||||
// This explains why PRS collections have less diverging execution paths between distributed or Overseer based cluster state updates.
|
||||
|
||||
if (isPRS) {
|
||||
// In case of a PRS collection, create the collection structure directly instead of resubmitting
|
||||
// to the overseer queue.
|
||||
// TODO: Consider doing this for all collections, not just the PRS collections.
|
||||
// TODO comment above achieved by switching the cluster to distributed state updates
|
||||
|
||||
// This code directly updates Zookeeper by creating the collection state.json. It is compatible with both distributed
|
||||
// cluster state updates and Overseer based cluster state updates.
|
||||
ZkWriteCommand command = new ClusterStateMutator(ocmh.cloudManager).createCollection(clusterState, message);
|
||||
byte[] data = Utils.toJSON(Collections.singletonMap(collectionName, command.collection));
|
||||
ocmh.zkStateReader.getZkClient().create(collectionPath, data, CreateMode.PERSISTENT, true);
|
||||
clusterState = clusterState.copyWith(collectionName, command.collection);
|
||||
newColl = command.collection;
|
||||
} else {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(message));
|
||||
if (ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
// The message has been crafted by CollectionsHandler.CollectionOperation.CREATE_OP and defines the QUEUE_OPERATION
|
||||
// to be CollectionParams.CollectionAction.CREATE.
|
||||
ocmh.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.ClusterCreateCollection, message,
|
||||
ocmh.cloudManager, ocmh.zkStateReader);
|
||||
} else {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(message));
|
||||
}
|
||||
|
||||
// wait for a while until we see the collection
|
||||
TimeOut waitUntil = new TimeOut(30, TimeUnit.SECONDS, timeSource);
|
||||
|
@ -169,14 +188,13 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
|
|||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not fully create collection: " + collectionName);
|
||||
}
|
||||
|
||||
// refresh cluster state
|
||||
// refresh cluster state (value read below comes from Zookeeper watch firing following the update done previously,
|
||||
// be it by Overseer or by this thread when updates are distributed)
|
||||
clusterState = ocmh.cloudManager.getClusterStateProvider().getClusterState();
|
||||
newColl = clusterState.getCollection(collectionName);
|
||||
|
||||
}
|
||||
|
||||
|
||||
List<ReplicaPosition> replicaPositions = null;
|
||||
final List<ReplicaPosition> replicaPositions;
|
||||
try {
|
||||
replicaPositions = buildReplicaPositions(ocmh.overseer.getCoreContainer(), ocmh.cloudManager, clusterState, newColl,
|
||||
message, shardNames);
|
||||
|
@ -199,6 +217,17 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
|
|||
}
|
||||
Map<String,ShardRequest> coresToCreate = new LinkedHashMap<>();
|
||||
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
|
||||
final DistributedClusterStateUpdater.StateChangeRecorder scr;
|
||||
|
||||
// PRS collections update Zookeeper directly, so even if we run in distributed state update,
|
||||
// there's nothing to update in state.json for such collection in the loop over replica positions below.
|
||||
if (!isPRS && ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
// The collection got created. Now we're adding replicas (and will update ZK only once when done adding).
|
||||
scr = ocmh.getDistributedClusterStateUpdater().createStateChangeRecorder(collectionName, false);;
|
||||
} else {
|
||||
scr = null;
|
||||
}
|
||||
|
||||
for (ReplicaPosition replicaPosition : replicaPositions) {
|
||||
String nodeName = replicaPosition.node;
|
||||
|
||||
|
@ -226,6 +255,8 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
|
|||
// In case of a PRS collection, execute the ADDREPLICA directly instead of resubmitting
|
||||
// to the overseer queue.
|
||||
// TODO: Consider doing this for all collections, not just the PRS collections.
|
||||
|
||||
// This PRS specific code is compatible with both Overseer and distributed cluster state update strategies
|
||||
ZkWriteCommand command = new SliceMutator(ocmh.cloudManager).addReplica(clusterState, props);
|
||||
byte[] data = Utils.toJSON(Collections.singletonMap(collectionName, command.collection));
|
||||
// log.info("collection updated : {}", new String(data, StandardCharsets.UTF_8));
|
||||
|
@ -233,7 +264,11 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
|
|||
clusterState = clusterState.copyWith(collectionName, command.collection);
|
||||
newColl = command.collection;
|
||||
} else {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(props));
|
||||
if (ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
scr.record(DistributedClusterStateUpdater.MutatingCommand.SliceAddReplica, props);
|
||||
} else {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(props));
|
||||
}
|
||||
}
|
||||
|
||||
// Need to create new params for each request
|
||||
|
@ -266,14 +301,20 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
|
|||
coresToCreate.put(coreName, sreq);
|
||||
}
|
||||
|
||||
// wait for all replica entries to be created
|
||||
Map<String, Replica> replicas ;
|
||||
// PRS collections did their own thing and we didn't create a StateChangeRecorder for them
|
||||
if (!isPRS && ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
// Add the replicas to the collection state (all at once after the loop above)
|
||||
scr.executeStateUpdates(ocmh.cloudManager, ocmh.zkStateReader);
|
||||
}
|
||||
|
||||
final Map<String, Replica> replicas;
|
||||
if (isPRS) {
|
||||
replicas = new ConcurrentHashMap<>();
|
||||
newColl.getSlices().stream().flatMap(slice -> slice.getReplicas().stream())
|
||||
.filter(r -> coresToCreate.containsKey(r.getCoreName())) // Only the elements that were asked for...
|
||||
.forEach(r -> replicas.putIfAbsent(r.getCoreName(), r)); // ...get added to the map
|
||||
} else {
|
||||
// wait for all replica entries to be created and visible in local cluster state (updated by ZK watches)
|
||||
replicas = ocmh.waitToSeeReplicasInState(collectionName, coresToCreate.keySet());
|
||||
}
|
||||
|
||||
|
@ -286,7 +327,7 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
|
|||
shardRequestTracker.processResponses(results, shardHandler, false, null, Collections.emptySet());
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
boolean failure = results.get("failure") != null && ((SimpleOrderedMap)results.get("failure")).size() > 0;
|
||||
if(isPRS) {
|
||||
if (isPRS) {
|
||||
TimeOut timeout = new TimeOut(Integer.getInteger("solr.waitToSeeReplicasInStateTimeoutSeconds", 120), TimeUnit.SECONDS, timeSource); // could be a big cluster
|
||||
PerReplicaStates prs = PerReplicaStates.fetch(collectionPath, ocmh.zkStateReader.getZkClient(), null);
|
||||
while (!timeout.hasTimedOut()) {
|
||||
|
@ -299,9 +340,14 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
|
|||
} else {
|
||||
failure = true;
|
||||
}
|
||||
// Now ask Overseer to fetch the latest state of collection
|
||||
// from ZK
|
||||
ocmh.overseer.submit(new RefreshCollectionMessage(collectionName));
|
||||
// When cluster state updates are distributed, Overseer state updater is not used and doesn't have to be notified
|
||||
// of a new collection created elsewhere (which is how all collections are created).
|
||||
// Note it is likely possibly to skip the the whole if (isPRS) bloc, but keeping distributed state updates as
|
||||
// close in behavior to Overseer state updates for now.
|
||||
if (!ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
// Now ask Overseer to fetch the latest state of collection from ZK
|
||||
ocmh.overseer.submit(new RefreshCollectionMessage(collectionName));
|
||||
}
|
||||
}
|
||||
if (failure) {
|
||||
// Let's cleanup as we hit an exception
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.lang.invoke.MethodHandles;
|
|||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.cloud.DistributedClusterStateUpdater;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.DocCollection;
|
||||
|
@ -78,10 +79,17 @@ public class CreateShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, NRT_REPLICAS + " + " + TLOG_REPLICAS + " must be greater than 0");
|
||||
}
|
||||
|
||||
//ZkStateReader zkStateReader = ocmh.zkStateReader;
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(message));
|
||||
// wait for a while until we see the shard
|
||||
//ocmh.waitForNewShard(collectionName, sliceName);
|
||||
if (ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
// The message has been crafted by CollectionsHandler.CollectionOperation.CREATESHARD_OP and defines the QUEUE_OPERATION
|
||||
// to be CollectionParams.CollectionAction.CREATESHARD.
|
||||
// Likely a bug here (distributed or Overseer based) as we use the collection alias name and not the real name?
|
||||
ocmh.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.CollectionCreateShard, message,
|
||||
ocmh.cloudManager, ocmh.zkStateReader);
|
||||
} else {
|
||||
// message contains extCollectionName that might be an alias. Unclear (to me) how this works in that case.
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(message));
|
||||
}
|
||||
|
||||
// wait for a while until we see the shard and update the local view of the cluster state
|
||||
clusterState = ocmh.waitForNewShard(collectionName, sliceName);
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ import java.util.Set;
|
|||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.solr.cloud.DistributedClusterStateUpdater;
|
||||
import org.apache.solr.cloud.Overseer;
|
||||
import org.apache.solr.common.NonExistentCoreException;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -41,7 +42,6 @@ import org.apache.solr.common.cloud.ZkStateReader;
|
|||
import org.apache.solr.common.params.CoreAdminParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.TimeSource;
|
||||
import org.apache.solr.common.util.Utils;
|
||||
import org.apache.solr.core.SolrInfoBean;
|
||||
import org.apache.solr.core.snapshots.SolrSnapshotManager;
|
||||
|
@ -63,11 +63,9 @@ public class DeleteCollectionCmd implements OverseerCollectionMessageHandler.Cmd
|
|||
private static final Set<String> okayExceptions = Collections.singleton(NonExistentCoreException.class.getName());
|
||||
|
||||
private final OverseerCollectionMessageHandler ocmh;
|
||||
private final TimeSource timeSource;
|
||||
|
||||
public DeleteCollectionCmd(OverseerCollectionMessageHandler ocmh) {
|
||||
this.ocmh = ocmh;
|
||||
this.timeSource = ocmh.cloudManager.getTimeSource();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -150,7 +148,12 @@ public class DeleteCollectionCmd implements OverseerCollectionMessageHandler.Cmd
|
|||
}
|
||||
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, DELETE.toLower(), NAME, collection);
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
if (ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
ocmh.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.ClusterDeleteCollection, m,
|
||||
ocmh.cloudManager, ocmh.zkStateReader);
|
||||
} else {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
}
|
||||
|
||||
// wait for a while until we don't see the collection
|
||||
zkStateReader.waitForState(collection, 60, TimeUnit.SECONDS, Objects::isNull);
|
||||
|
|
|
@ -34,6 +34,7 @@ import java.util.Map;
|
|||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.solr.cloud.DistributedClusterStateUpdater;
|
||||
import org.apache.solr.cloud.Overseer;
|
||||
import org.apache.solr.cloud.overseer.OverseerAction;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -97,7 +98,19 @@ public class DeleteShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
propMap.put(sliceId, Slice.State.CONSTRUCTION.toString());
|
||||
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
|
||||
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
if (ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
// In this DeleteShardCmd.call() method there are potentially two cluster state updates. This is the first one.
|
||||
// Even though the code of this method does not wait for it to complete, it does call the Collection API before
|
||||
// it issues the second state change below. The collection API will be doing its own state change(s), and these will
|
||||
// happen after this one (given it's for the same collection). Therefore we persist this state change
|
||||
// immediately and do not group it with the one done further down.
|
||||
// Once the Collection API is also distributed (and not only the cluster state updates), we will likely be able
|
||||
// to batch more/all cluster state updates done by this command (DeleteShardCmd). TODO SOLR-15146
|
||||
ocmh.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.SliceUpdateShardState, m,
|
||||
ocmh.cloudManager, ocmh.zkStateReader);
|
||||
} else {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
}
|
||||
}
|
||||
|
||||
String asyncId = message.getStr(ASYNC);
|
||||
|
@ -144,7 +157,12 @@ public class DeleteShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, DELETESHARD.toLower(), ZkStateReader.COLLECTION_PROP,
|
||||
collectionName, ZkStateReader.SHARD_ID_PROP, sliceId);
|
||||
ZkStateReader zkStateReader = ocmh.zkStateReader;
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
if (ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
ocmh.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.CollectionDeleteShard, m,
|
||||
ocmh.cloudManager, ocmh.zkStateReader);
|
||||
} else {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
}
|
||||
|
||||
zkStateReader.waitForState(collectionName, 45, TimeUnit.SECONDS, (c) -> c.getSlice(sliceId) == null);
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.Map;
|
|||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.solr.client.solrj.request.CoreAdminRequest;
|
||||
import org.apache.solr.cloud.DistributedClusterStateUpdater;
|
||||
import org.apache.solr.cloud.Overseer;
|
||||
import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.ShardRequestTracker;
|
||||
import org.apache.solr.cloud.overseer.OverseerAction;
|
||||
|
@ -201,7 +202,12 @@ public class MigrateCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
"targetCollection", targetCollection.getName(),
|
||||
"expireAt", RoutingRule.makeExpiryAt(timeout));
|
||||
log.info("Adding routing rule: {}", m);
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
if (ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
ocmh.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.SliceAddRoutingRule, m,
|
||||
ocmh.cloudManager, ocmh.zkStateReader);
|
||||
} else {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
}
|
||||
|
||||
// wait for a while until we see the new rule
|
||||
log.info("Waiting to see routing rule updated in clusterstate");
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
|||
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
|
||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
import org.apache.solr.client.solrj.response.UpdateResponse;
|
||||
import org.apache.solr.cloud.DistributedClusterStateUpdater;
|
||||
import org.apache.solr.cloud.LockTree;
|
||||
import org.apache.solr.cloud.Overseer;
|
||||
import org.apache.solr.cloud.OverseerMessageHandler;
|
||||
|
@ -149,6 +150,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
|
|||
String myId;
|
||||
Stats stats;
|
||||
TimeSource timeSource;
|
||||
private final DistributedClusterStateUpdater distributedClusterStateUpdater;
|
||||
|
||||
// Set that tracks collections that are currently being processed by a running task.
|
||||
// This is used for handling mutual exclusion of the tasks.
|
||||
|
@ -186,6 +188,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
|
|||
this.myId = myId;
|
||||
this.stats = stats;
|
||||
this.overseer = overseer;
|
||||
this.distributedClusterStateUpdater = overseer.getDistributedClusterStateUpdater();
|
||||
this.cloudManager = overseer.getSolrCloudManager();
|
||||
this.timeSource = cloudManager.getTimeSource();
|
||||
this.isClosed = false;
|
||||
|
@ -267,6 +270,10 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
|
|||
return new OverseerSolrResponse(results);
|
||||
}
|
||||
|
||||
DistributedClusterStateUpdater getDistributedClusterStateUpdater() {
|
||||
return distributedClusterStateUpdater;
|
||||
}
|
||||
|
||||
@SuppressForbidden(reason = "Needs currentTimeMillis for mock requests")
|
||||
@SuppressWarnings({"unchecked"})
|
||||
private void mockOperation(ClusterState state, ZkNodeProps message, @SuppressWarnings({"rawtypes"})NamedList results) throws InterruptedException {
|
||||
|
@ -328,23 +335,31 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
|
|||
private void processReplicaAddPropertyCommand(ClusterState clusterState, ZkNodeProps message, @SuppressWarnings({"rawtypes"})NamedList results)
|
||||
throws Exception {
|
||||
checkRequired(message, COLLECTION_PROP, SHARD_ID_PROP, REPLICA_PROP, PROPERTY_PROP, PROPERTY_VALUE_PROP);
|
||||
SolrZkClient zkClient = zkStateReader.getZkClient();
|
||||
Map<String, Object> propMap = new HashMap<>();
|
||||
propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICAPROP.toLower());
|
||||
propMap.putAll(message.getProperties());
|
||||
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||
overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
if (distributedClusterStateUpdater.isDistributedStateUpdate()) {
|
||||
distributedClusterStateUpdater.doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.ReplicaAddReplicaProperty, m,
|
||||
cloudManager, zkStateReader);
|
||||
} else {
|
||||
overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
}
|
||||
}
|
||||
|
||||
private void processReplicaDeletePropertyCommand(ClusterState clusterState, ZkNodeProps message, @SuppressWarnings({"rawtypes"})NamedList results)
|
||||
throws Exception {
|
||||
checkRequired(message, COLLECTION_PROP, SHARD_ID_PROP, REPLICA_PROP, PROPERTY_PROP);
|
||||
SolrZkClient zkClient = zkStateReader.getZkClient();
|
||||
Map<String, Object> propMap = new HashMap<>();
|
||||
propMap.put(Overseer.QUEUE_OPERATION, DELETEREPLICAPROP.toLower());
|
||||
propMap.putAll(message.getProperties());
|
||||
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||
overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
if (distributedClusterStateUpdater.isDistributedStateUpdate()) {
|
||||
distributedClusterStateUpdater.doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.ReplicaDeleteReplicaProperty, m,
|
||||
cloudManager, zkStateReader);
|
||||
} else {
|
||||
overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
}
|
||||
}
|
||||
|
||||
private void balanceProperty(ClusterState clusterState, ZkNodeProps message, @SuppressWarnings({"rawtypes"})NamedList results) throws Exception {
|
||||
|
@ -353,11 +368,15 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
|
|||
"The '" + COLLECTION_PROP + "' and '" + PROPERTY_PROP +
|
||||
"' parameters are required for the BALANCESHARDUNIQUE operation, no action taken");
|
||||
}
|
||||
SolrZkClient zkClient = zkStateReader.getZkClient();
|
||||
Map<String, Object> m = new HashMap<>();
|
||||
m.put(Overseer.QUEUE_OPERATION, BALANCESHARDUNIQUE.toLower());
|
||||
m.putAll(message.getProperties());
|
||||
overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
if (distributedClusterStateUpdater.isDistributedStateUpdate()) {
|
||||
distributedClusterStateUpdater.doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.BalanceShardsUnique, new ZkNodeProps(m),
|
||||
cloudManager, zkStateReader);
|
||||
} else {
|
||||
overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -424,7 +443,12 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
|
|||
ZkStateReader.NODE_NAME_PROP, replica.getNodeName(),
|
||||
ZkStateReader.COLLECTION_PROP, collectionName,
|
||||
ZkStateReader.CORE_NODE_NAME_PROP, replicaName);
|
||||
overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
if (distributedClusterStateUpdater.isDistributedStateUpdate()) {
|
||||
distributedClusterStateUpdater.doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.SliceRemoveReplica, m,
|
||||
cloudManager, zkStateReader);
|
||||
} else {
|
||||
overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
}
|
||||
}
|
||||
|
||||
void checkRequired(ZkNodeProps message, String... props) {
|
||||
|
@ -554,7 +578,13 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
|
|||
reloadCollection(null, new ZkNodeProps(NAME, collectionName), results);
|
||||
}
|
||||
|
||||
overseer.offerStateUpdate(Utils.toJSON(message));
|
||||
if (distributedClusterStateUpdater.isDistributedStateUpdate()) {
|
||||
// Apply the state update right away. The wait will still be useful for the change to be visible in the local cluster state (watchers have fired).
|
||||
distributedClusterStateUpdater.doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.CollectionModifyCollection, message,
|
||||
cloudManager, zkStateReader);
|
||||
} else {
|
||||
overseer.offerStateUpdate(Utils.toJSON(message));
|
||||
}
|
||||
|
||||
try {
|
||||
zkStateReader.waitForState(collectionName, 30, TimeUnit.SECONDS, c -> {
|
||||
|
|
|
@ -60,6 +60,16 @@ public class OverseerStatusCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
zkStateReader.getZkClient().getData("/overseer/collection-queue-work",null, stat, true);
|
||||
results.add("overseer_collection_queue_size", stat.getNumChildren());
|
||||
|
||||
// Overseer reported stats below are tracked in the Overseer cluster state updater when it performs certain operations.
|
||||
// Sharing the ocmh.stats variable between the cluster state updater and the Collection API (this command) is by the way
|
||||
// about the only thing that ties the cluster state updater to the collection api message handler and that takes
|
||||
// advantage of the fact that both run on the same node (the Overseer node). (recently added PerReplicaStates also
|
||||
// take advantage of this through method Overseer.submit()).
|
||||
// When distributed updates are enabled, cluster state updates are not done by the Overseer (it doesn't even see them)
|
||||
// and therefore can't report them. The corresponding data in OVERSEERSTATUS (all data built below) is no longer returned.
|
||||
// This means top level keys "overseer_operations", "collection_operations", "overseer_queue", "overseer_internal_queue"
|
||||
// and "collection_queue" are either empty or do not contain all expected information when cluster state updates are distributed.
|
||||
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
NamedList overseerStats = new NamedList();
|
||||
@SuppressWarnings({"rawtypes"})
|
||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
|||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.cloud.DistributedClusterStateUpdater;
|
||||
import org.apache.solr.cloud.Overseer;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
|
@ -370,7 +371,12 @@ public class ReindexCollectionCmd implements OverseerCollectionMessageHandler.Cm
|
|||
Overseer.QUEUE_OPERATION, CollectionParams.CollectionAction.MODIFYCOLLECTION.toLower(),
|
||||
ZkStateReader.COLLECTION_PROP, collection,
|
||||
ZkStateReader.READ_ONLY, "true");
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(cmd));
|
||||
if (ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
ocmh.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.CollectionModifyCollection, cmd,
|
||||
ocmh.cloudManager, ocmh.zkStateReader);
|
||||
} else {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(cmd));
|
||||
}
|
||||
|
||||
TestInjection.injectReindexLatch();
|
||||
|
||||
|
@ -477,14 +483,24 @@ public class ReindexCollectionCmd implements OverseerCollectionMessageHandler.Cm
|
|||
Overseer.QUEUE_OPERATION, CollectionParams.CollectionAction.MODIFYCOLLECTION.toLower(),
|
||||
ZkStateReader.COLLECTION_PROP, collection,
|
||||
ZkStateReader.READ_ONLY, null);
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(props));
|
||||
if (ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
ocmh.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.CollectionModifyCollection, props,
|
||||
ocmh.cloudManager, ocmh.zkStateReader);
|
||||
} else {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(props));
|
||||
}
|
||||
}
|
||||
// 9. set FINISHED state on the target and clear the state on the source
|
||||
ZkNodeProps props = new ZkNodeProps(
|
||||
Overseer.QUEUE_OPERATION, CollectionParams.CollectionAction.MODIFYCOLLECTION.toLower(),
|
||||
ZkStateReader.COLLECTION_PROP, targetCollection,
|
||||
REINDEXING_STATE, State.FINISHED.toLower());
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(props));
|
||||
if (ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
ocmh.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.CollectionModifyCollection, props,
|
||||
ocmh.cloudManager, ocmh.zkStateReader);
|
||||
} else {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(props));
|
||||
}
|
||||
|
||||
reindexingState.put(STATE, State.FINISHED.toLower());
|
||||
reindexingState.put(PHASE, "done");
|
||||
|
@ -804,7 +820,12 @@ public class ReindexCollectionCmd implements OverseerCollectionMessageHandler.Cm
|
|||
Overseer.QUEUE_OPERATION, CollectionParams.CollectionAction.MODIFYCOLLECTION.toLower(),
|
||||
ZkStateReader.COLLECTION_PROP, collection,
|
||||
ZkStateReader.READ_ONLY, null);
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(props));
|
||||
if (ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
ocmh.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.CollectionModifyCollection, props,
|
||||
ocmh.cloudManager, ocmh.zkStateReader);
|
||||
} else {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(props));
|
||||
}
|
||||
removeReindexingState(collection);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,7 +17,26 @@
|
|||
|
||||
package org.apache.solr.cloud.api.collections;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.URI;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
import org.apache.solr.cloud.DistributedClusterStateUpdater;
|
||||
import org.apache.solr.cloud.Overseer;
|
||||
import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.ShardRequestTracker;
|
||||
import org.apache.solr.cloud.overseer.OverseerAction;
|
||||
|
@ -48,25 +67,6 @@ import org.apache.zookeeper.KeeperException;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.URI;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.*;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATE;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATESHARD;
|
||||
|
@ -343,7 +343,12 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
propMap.put(shard.getName(), Slice.State.CONSTRUCTION.toString());
|
||||
}
|
||||
propMap.put(ZkStateReader.COLLECTION_PROP, restoreCollection.getName());
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(new ZkNodeProps(propMap)));
|
||||
if (ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
ocmh.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.SliceUpdateShardState, new ZkNodeProps(propMap),
|
||||
ocmh.cloudManager, ocmh.zkStateReader);
|
||||
} else {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(new ZkNodeProps(propMap)));
|
||||
}
|
||||
}
|
||||
|
||||
private List<ReplicaPosition> getReplicaPositions(DocCollection restoreCollection, List<String> nodeList, ClusterState clusterState, List<String> sliceNames) throws IOException, InterruptedException {
|
||||
|
@ -461,7 +466,12 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
for (Slice shard : restoreCollection.getSlices()) {
|
||||
propMap.put(shard.getName(), Slice.State.ACTIVE.toString());
|
||||
}
|
||||
ocmh.overseer.offerStateUpdate((Utils.toJSON(new ZkNodeProps(propMap))));
|
||||
if (ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
ocmh.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.SliceUpdateShardState, new ZkNodeProps(propMap),
|
||||
ocmh.cloudManager, ocmh.zkStateReader);
|
||||
} else {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(new ZkNodeProps(propMap)));
|
||||
}
|
||||
}
|
||||
|
||||
private void addReplicasToShards(@SuppressWarnings({"rawtypes"}) NamedList results,
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.solr.client.solrj.cloud.NodeStateProvider;
|
|||
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
|
||||
import org.apache.solr.client.solrj.cloud.VersionedData;
|
||||
import org.apache.solr.client.solrj.request.CoreAdminRequest;
|
||||
import org.apache.solr.cloud.DistributedClusterStateUpdater;
|
||||
import org.apache.solr.cloud.Overseer;
|
||||
import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler.ShardRequestTracker;
|
||||
import org.apache.solr.cloud.overseer.OverseerAction;
|
||||
|
@ -245,7 +246,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
// delete the shards
|
||||
log.info("Sub-shard: {} already exists therefore requesting its deletion", subSlice);
|
||||
Map<String, Object> propMap = new HashMap<>();
|
||||
propMap.put(Overseer.QUEUE_OPERATION, "deleteshard");
|
||||
propMap.put(Overseer.QUEUE_OPERATION, DELETESHARD.toLower());
|
||||
propMap.put(COLLECTION_PROP, collectionName);
|
||||
propMap.put(SHARD_ID_PROP, subSlice);
|
||||
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||
|
@ -288,7 +289,12 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
propMap.put("shard_parent_node", nodeName);
|
||||
propMap.put("shard_parent_zk_session", leaderZnodeStat.getEphemeralOwner());
|
||||
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(new ZkNodeProps(propMap)));
|
||||
if (ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
ocmh.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.CollectionCreateShard, new ZkNodeProps(propMap),
|
||||
ocmh.cloudManager, ocmh.zkStateReader);
|
||||
} else {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(new ZkNodeProps(propMap)));
|
||||
}
|
||||
|
||||
// wait until we are able to see the new shard in cluster state and refresh the local view of the cluster state
|
||||
clusterState = ocmh.waitForNewShard(collectionName, subSlice);
|
||||
|
@ -442,6 +448,13 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
t.stop();
|
||||
|
||||
t = timings.sub("createReplicaPlaceholders");
|
||||
final DistributedClusterStateUpdater.StateChangeRecorder scr;
|
||||
boolean hasRecordedDistributedUpdate = false;
|
||||
if (ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
scr = ocmh.getDistributedClusterStateUpdater().createStateChangeRecorder(collectionName, false);
|
||||
} else {
|
||||
scr = null;
|
||||
}
|
||||
for (ReplicaPosition replicaPosition : replicaPositions) {
|
||||
String sliceName = replicaPosition.shard;
|
||||
String subShardNodeName = replicaPosition.node;
|
||||
|
@ -462,7 +475,12 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
ZkStateReader.STATE_PROP, Replica.State.DOWN.toString(),
|
||||
ZkStateReader.NODE_NAME_PROP, subShardNodeName,
|
||||
CommonAdminParams.WAIT_FOR_FINAL_STATE, Boolean.toString(waitForFinalState));
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(props));
|
||||
if (ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
hasRecordedDistributedUpdate = true;
|
||||
scr.record(DistributedClusterStateUpdater.MutatingCommand.SliceAddReplica, props);
|
||||
} else {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(props));
|
||||
}
|
||||
|
||||
HashMap<String, Object> propMap = new HashMap<>();
|
||||
propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
|
||||
|
@ -488,6 +506,12 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
|
||||
replicas.add(propMap);
|
||||
}
|
||||
if (hasRecordedDistributedUpdate && ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
// Actually add the replicas to the collection state. Note that when Overseer takes care of the state,
|
||||
// there is no wait here for the state update to be visible, but with distributed state update done synchronously
|
||||
// we wait (we could in theory create a thread and have it do the work if we REALLY needed, but we likely don't).
|
||||
scr.executeStateUpdates(ocmh.cloudManager, ocmh.zkStateReader);
|
||||
}
|
||||
t.stop();
|
||||
assert TestInjection.injectSplitFailureBeforeReplicaCreation();
|
||||
|
||||
|
@ -504,12 +528,17 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
}
|
||||
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
|
||||
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
if (ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
ocmh.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.SliceUpdateShardState, m,
|
||||
ocmh.cloudManager, ocmh.zkStateReader);
|
||||
} else {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
}
|
||||
|
||||
if (leaderZnodeStat == null) {
|
||||
// the leader is not live anymore, fail the split!
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The shard leader node: " + parentShardLeader.getNodeName() + " is not live anymore!");
|
||||
} else if (ephemeralOwner != leaderZnodeStat.getEphemeralOwner()) {
|
||||
} else {
|
||||
// there's a new leader, fail the split!
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
"The zk session id for the shard leader node: " + parentShardLeader.getNodeName() + " has changed from "
|
||||
|
@ -538,7 +567,12 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
}
|
||||
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
|
||||
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
if (ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
ocmh.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.SliceUpdateShardState, m,
|
||||
ocmh.cloudManager, ocmh.zkStateReader);
|
||||
} else {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
}
|
||||
} else {
|
||||
log.info("Requesting shard state be set to 'recovery'");
|
||||
Map<String, Object> propMap = new HashMap<>();
|
||||
|
@ -548,7 +582,12 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
}
|
||||
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
|
||||
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
if (ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
ocmh.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.SliceUpdateShardState, m,
|
||||
ocmh.cloudManager, ocmh.zkStateReader);
|
||||
} else {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
}
|
||||
}
|
||||
|
||||
t = timings.sub("createCoresForReplicas");
|
||||
|
@ -709,7 +748,12 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
if (sendUpdateState) {
|
||||
try {
|
||||
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
if (ocmh.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
ocmh.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.SliceUpdateShardState, m,
|
||||
ocmh.cloudManager, ocmh.zkStateReader);
|
||||
} else {
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
// don't give up yet - just log the error, we may still be able to clean up
|
||||
log.warn("Cleanup failed after failed split of {}/{}: (slice state changes)", collectionName, parentShard, e);
|
||||
|
@ -724,7 +768,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
}
|
||||
log.debug("- sub-shard: {} exists therefore requesting its deletion", subSlice);
|
||||
HashMap<String, Object> props = new HashMap<>();
|
||||
props.put(Overseer.QUEUE_OPERATION, "deleteshard");
|
||||
props.put(Overseer.QUEUE_OPERATION, DELETESHARD.toLower());
|
||||
props.put(COLLECTION_PROP, collectionName);
|
||||
props.put(SHARD_ID_PROP, subSlice);
|
||||
ZkNodeProps m = new ZkNodeProps(props);
|
||||
|
|
|
@ -26,14 +26,12 @@ import org.apache.solr.client.solrj.cloud.DistribStateManager;
|
|||
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
|
||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||
import org.apache.solr.common.cloud.*;
|
||||
import org.apache.solr.common.util.Utils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.NRT_REPLICAS;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.REPLICATION_FACTOR;
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
public class CollectionMutator {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
@ -155,24 +153,9 @@ public class CollectionMutator {
|
|||
}
|
||||
|
||||
public static DocCollection updateSlice(String collectionName, DocCollection collection, Slice slice) {
|
||||
DocCollection newCollection = null;
|
||||
Map<String, Slice> slices;
|
||||
|
||||
if (collection == null) {
|
||||
// when updateSlice is called on a collection that doesn't exist, it's currently when a core is publishing itself
|
||||
// without explicitly creating a collection. In this current case, we assume custom sharding with an "implicit" router.
|
||||
slices = new LinkedHashMap<>(1);
|
||||
slices.put(slice.getName(), slice);
|
||||
Map<String, Object> props = new HashMap<>(1);
|
||||
props.put(DocCollection.DOC_ROUTER, Utils.makeMap(NAME, ImplicitDocRouter.NAME));
|
||||
newCollection = new DocCollection(collectionName, slices, props, new ImplicitDocRouter());
|
||||
} else {
|
||||
slices = new LinkedHashMap<>(collection.getSlicesMap()); // make a shallow copy
|
||||
slices.put(slice.getName(), slice);
|
||||
newCollection = collection.copyWithSlices(slices);
|
||||
}
|
||||
|
||||
return newCollection;
|
||||
Map<String, Slice> slices = new LinkedHashMap<>(collection.getSlicesMap()); // make a shallow copy
|
||||
slices.put(slice.getName(), slice);
|
||||
return collection.copyWithSlices(slices);
|
||||
}
|
||||
|
||||
static boolean checkCollectionKeyExistence(ZkNodeProps message) {
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.solr.cloud.overseer;
|
|||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Optional;
|
||||
import java.util.Collection;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
|
@ -39,57 +40,74 @@ public class NodeMutator {
|
|||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
public List<ZkWriteCommand> downNode(ClusterState clusterState, ZkNodeProps message) {
|
||||
List<ZkWriteCommand> zkWriteCommands = new ArrayList<>();
|
||||
String nodeName = message.getStr(ZkStateReader.NODE_NAME_PROP);
|
||||
|
||||
log.debug("DownNode state invoked for node: {}", nodeName);
|
||||
|
||||
List<ZkWriteCommand> zkWriteCommands = new ArrayList<>();
|
||||
|
||||
Map<String, DocCollection> collections = clusterState.getCollectionsMap();
|
||||
for (Map.Entry<String, DocCollection> entry : collections.entrySet()) {
|
||||
List<String> downedReplicas = new ArrayList<>();
|
||||
|
||||
String collection = entry.getKey();
|
||||
String collectionName = entry.getKey();
|
||||
DocCollection docCollection = entry.getValue();
|
||||
|
||||
Map<String,Slice> slicesCopy = new LinkedHashMap<>(docCollection.getSlicesMap());
|
||||
Optional<ZkWriteCommand> zkWriteCommand = computeCollectionUpdate(nodeName, collectionName, docCollection);
|
||||
|
||||
boolean needToUpdateCollection = false;
|
||||
for (Entry<String, Slice> sliceEntry : slicesCopy.entrySet()) {
|
||||
Slice slice = sliceEntry.getValue();
|
||||
Map<String, Replica> newReplicas = slice.getReplicasCopy();
|
||||
|
||||
Collection<Replica> replicas = slice.getReplicas();
|
||||
for (Replica replica : replicas) {
|
||||
String rNodeName = replica.getNodeName();
|
||||
if (rNodeName == null) {
|
||||
throw new RuntimeException("Replica without node name! " + replica);
|
||||
}
|
||||
if (rNodeName.equals(nodeName)) {
|
||||
log.debug("Update replica state for {} to {}", replica, Replica.State.DOWN);
|
||||
Map<String, Object> props = replica.shallowCopy();
|
||||
Replica newReplica = new Replica(replica.getName(), replica.node, replica.collection, slice.getName(), replica.core,
|
||||
Replica.State.DOWN, replica.type, props);
|
||||
newReplicas.put(replica.getName(), newReplica);
|
||||
needToUpdateCollection = true;
|
||||
downedReplicas.add(replica.getName());
|
||||
}
|
||||
}
|
||||
|
||||
Slice newSlice = new Slice(slice.getName(), newReplicas, slice.shallowCopy(),collection);
|
||||
slicesCopy.put(slice.getName(), newSlice);
|
||||
}
|
||||
|
||||
if (needToUpdateCollection) {
|
||||
if (docCollection.isPerReplicaState()) {
|
||||
zkWriteCommands.add(new ZkWriteCommand(collection, docCollection.copyWithSlices(slicesCopy),
|
||||
PerReplicaStatesOps.downReplicas(downedReplicas, docCollection.getPerReplicaStates()), false));
|
||||
} else {
|
||||
zkWriteCommands.add(new ZkWriteCommand(collection, docCollection.copyWithSlices(slicesCopy)));
|
||||
}
|
||||
if (zkWriteCommand.isPresent()) {
|
||||
zkWriteCommands.add(zkWriteCommand.get());
|
||||
}
|
||||
}
|
||||
|
||||
return zkWriteCommands;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the write command needed to update the replicas of a given collection given the identity of a node being down.
|
||||
* @return An optional with the write command or an empty one if the collection does not need any state modification.
|
||||
* The returned write command might be for per replica state updates or for an update to state.json, depending on the
|
||||
* configuration of the collection.
|
||||
*/
|
||||
public static Optional<ZkWriteCommand> computeCollectionUpdate(String nodeName, String collectionName, DocCollection docCollection) {
|
||||
boolean needToUpdateCollection = false;
|
||||
List<String> downedReplicas = new ArrayList<>();
|
||||
Map<String,Slice> slicesCopy = new LinkedHashMap<>(docCollection.getSlicesMap());
|
||||
|
||||
for (Entry<String, Slice> sliceEntry : slicesCopy.entrySet()) {
|
||||
Slice slice = sliceEntry.getValue();
|
||||
Map<String, Replica> newReplicas = slice.getReplicasCopy();
|
||||
|
||||
Collection<Replica> replicas = slice.getReplicas();
|
||||
for (Replica replica : replicas) {
|
||||
String rNodeName = replica.getNodeName();
|
||||
if (rNodeName == null) {
|
||||
throw new RuntimeException("Replica without node name! " + replica);
|
||||
}
|
||||
if (rNodeName.equals(nodeName)) {
|
||||
log.debug("Update replica state for {} to {}", replica, Replica.State.DOWN);
|
||||
Map<String, Object> props = replica.shallowCopy();
|
||||
Replica newReplica = new Replica(replica.getName(), replica.node, replica.collection, slice.getName(), replica.core,
|
||||
Replica.State.DOWN, replica.type, props);
|
||||
newReplicas.put(replica.getName(), newReplica);
|
||||
needToUpdateCollection = true;
|
||||
downedReplicas.add(replica.getName());
|
||||
}
|
||||
}
|
||||
|
||||
Slice newSlice = new Slice(slice.getName(), newReplicas, slice.shallowCopy(),collectionName);
|
||||
slicesCopy.put(slice.getName(), newSlice);
|
||||
}
|
||||
|
||||
if (needToUpdateCollection) {
|
||||
if (docCollection.isPerReplicaState()) {
|
||||
return Optional.of(new ZkWriteCommand(collectionName, docCollection.copyWithSlices(slicesCopy),
|
||||
PerReplicaStatesOps.downReplicas(downedReplicas, docCollection.getPerReplicaStates()), false));
|
||||
} else {
|
||||
return Optional.of(new ZkWriteCommand(collectionName, docCollection.copyWithSlices(slicesCopy)));
|
||||
}
|
||||
} else {
|
||||
// No update needed for this collection
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -229,7 +229,7 @@ public class SliceMutator {
|
|||
DocCollection collection = clusterState.getCollection(collectionName);
|
||||
Slice slice = collection.getSlice(shard);
|
||||
if (slice == null) {
|
||||
throw new RuntimeException("Overseer.addRoutingRule unknown collection: " + collectionName + " slice:" + shard);
|
||||
throw new RuntimeException("Overseer.addRoutingRule collection: " + collectionName + ", unknown slice: " + shard);
|
||||
}
|
||||
|
||||
Map<String, RoutingRule> routingRules = slice.getRoutingRules();
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.solr.cloud.Stats;
|
|||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.DocCollection;
|
||||
import org.apache.solr.common.cloud.PerReplicaStates;
|
||||
import org.apache.solr.common.cloud.PerReplicaStatesOps;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.util.Utils;
|
||||
import org.apache.zookeeper.CreateMode;
|
||||
|
@ -59,7 +60,7 @@ public class ZkStateWriter {
|
|||
/**
|
||||
* Represents a no-op {@link ZkWriteCommand} which will result in no modification to cluster state
|
||||
*/
|
||||
public static ZkWriteCommand NO_OP = ZkWriteCommand.noop();
|
||||
public static ZkWriteCommand NO_OP = ZkWriteCommand.NO_OP;
|
||||
|
||||
protected final ZkStateReader reader;
|
||||
protected final Stats stats;
|
||||
|
@ -217,14 +218,17 @@ public class ZkStateWriter {
|
|||
ZkWriteCommand cmd = entry.getValue();
|
||||
DocCollection c = cmd.collection;
|
||||
|
||||
if(cmd.ops != null && cmd.ops.isPreOp()) {
|
||||
// Update the Per Replica State znodes if needed
|
||||
if (cmd.ops != null) {
|
||||
cmd.ops.persist(path, reader.getZkClient());
|
||||
clusterState = clusterState.copyWith(name,
|
||||
cmd.collection.copyWith(PerReplicaStates.fetch(cmd.collection.getZNode(), reader.getZkClient(), null)));
|
||||
}
|
||||
if (!cmd.persistCollState) continue;
|
||||
|
||||
// Update the state.json file if needed
|
||||
if (!cmd.persistJsonState) continue;
|
||||
if (c == null) {
|
||||
// let's clean up the state.json of this collection only, the rest should be clean by delete collection cmd
|
||||
// let's clean up the state.json of this collection only, the rest should be cleaned by delete collection cmd
|
||||
log.debug("going to delete state.json {}", path);
|
||||
reader.getZkClient().clean(path);
|
||||
} else {
|
||||
|
@ -243,13 +247,18 @@ public class ZkStateWriter {
|
|||
clusterState = clusterState.copyWith(name, newCollection);
|
||||
}
|
||||
}
|
||||
if(cmd.ops != null && !cmd.ops.isPreOp()) {
|
||||
cmd.ops.persist(path, reader.getZkClient());
|
||||
DocCollection currentCollState = clusterState.getCollection(cmd.name);
|
||||
if ( currentCollState != null) {
|
||||
clusterState = clusterState.copyWith(name,
|
||||
currentCollState.copyWith(PerReplicaStates.fetch(currentCollState.getZNode(), reader.getZkClient(), null)));
|
||||
}
|
||||
|
||||
// When dealing with a per replica collection that did not do any update to the per replica states znodes but did
|
||||
// update state.json, we add then remove a dummy node to change the cversion of the parent znode.
|
||||
// This is not needed by Solr, there's no code watching the children and not watching the state.json node itself.
|
||||
// It would be useful for external code watching the collection's Zookeeper state.json node children but not the node itself.
|
||||
if (cmd.ops == null && cmd.isPerReplicaStateCollection) {
|
||||
PerReplicaStatesOps.touchChildren().persist(path, reader.getZkClient());
|
||||
DocCollection currentCollState = clusterState.getCollection(cmd.name);
|
||||
if (currentCollState != null) {
|
||||
clusterState = clusterState.copyWith(name,
|
||||
currentCollState.copyWith(PerReplicaStates.fetch(currentCollState.getZNode(), reader.getZkClient(), null)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -20,51 +20,34 @@ import org.apache.solr.common.cloud.DocCollection;
|
|||
import org.apache.solr.common.cloud.PerReplicaStatesOps;
|
||||
|
||||
public class ZkWriteCommand {
|
||||
/**
|
||||
* Single NO_OP instance, can be compared with ==
|
||||
*/
|
||||
static final ZkWriteCommand NO_OP = new ZkWriteCommand(null, null);
|
||||
|
||||
public final String name;
|
||||
public final DocCollection collection;
|
||||
public final boolean isPerReplicaStateCollection;
|
||||
|
||||
public final boolean noop;
|
||||
// persist the collection state. If this is false, it means the collection state is not modified
|
||||
public final boolean persistCollState;
|
||||
public final boolean persistJsonState;
|
||||
public final PerReplicaStatesOps ops;
|
||||
|
||||
public ZkWriteCommand(String name, DocCollection collection, PerReplicaStatesOps replicaOps, boolean persistCollState) {
|
||||
boolean isPerReplicaState = collection.isPerReplicaState();
|
||||
public ZkWriteCommand(String name, DocCollection collection, PerReplicaStatesOps replicaOps, boolean persistJsonState) {
|
||||
isPerReplicaStateCollection = collection != null && collection.isPerReplicaState();
|
||||
this.name = name;
|
||||
this.collection = collection;
|
||||
this.noop = false;
|
||||
this.ops = isPerReplicaState ? replicaOps : null;
|
||||
this.persistCollState = isPerReplicaState ? persistCollState : true;
|
||||
this.ops = replicaOps;
|
||||
this.persistJsonState = persistJsonState || !isPerReplicaStateCollection; // Always persist for non "per replica state" collections
|
||||
}
|
||||
|
||||
public ZkWriteCommand(String name, DocCollection collection) {
|
||||
this.name = name;
|
||||
this.collection = collection;
|
||||
this.noop = false;
|
||||
persistCollState = true;
|
||||
this.ops = collection != null && collection.isPerReplicaState() ?
|
||||
PerReplicaStatesOps.touchChildren():
|
||||
null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a no-op
|
||||
*/
|
||||
protected ZkWriteCommand() {
|
||||
this.noop = true;
|
||||
this.name = null;
|
||||
this.collection = null;
|
||||
this.ops = null;
|
||||
persistCollState = true;
|
||||
}
|
||||
|
||||
public static ZkWriteCommand noop() {
|
||||
return new ZkWriteCommand();
|
||||
this(name, collection, null, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName() + ": " + (noop ? "no-op" : name + "=" + collection);
|
||||
return getClass().getSimpleName() + ": " + (this == NO_OP ? "no-op" : name + "=" + collection);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -48,10 +48,12 @@ public class CloudConfig {
|
|||
|
||||
private final String pkiHandlerPublicKeyPath;
|
||||
|
||||
private final boolean useDistributedClusterStateUpdates;
|
||||
|
||||
CloudConfig(String zkHost, int zkClientTimeout, int hostPort, String hostName, String hostContext, boolean useGenericCoreNames,
|
||||
int leaderVoteWait, int leaderConflictResolveWait, String zkCredentialsProviderClass, String zkACLProviderClass,
|
||||
int createCollectionWaitTimeTillActive, boolean createCollectionCheckLeaderActive, String pkiHandlerPrivateKeyPath,
|
||||
String pkiHandlerPublicKeyPath) {
|
||||
String pkiHandlerPublicKeyPath, boolean useDistributedClusterStateUpdates) {
|
||||
this.zkHost = zkHost;
|
||||
this.zkClientTimeout = zkClientTimeout;
|
||||
this.hostPort = hostPort;
|
||||
|
@ -66,6 +68,7 @@ public class CloudConfig {
|
|||
this.createCollectionCheckLeaderActive = createCollectionCheckLeaderActive;
|
||||
this.pkiHandlerPrivateKeyPath = pkiHandlerPrivateKeyPath;
|
||||
this.pkiHandlerPublicKeyPath = pkiHandlerPublicKeyPath;
|
||||
this.useDistributedClusterStateUpdates = useDistributedClusterStateUpdates;
|
||||
|
||||
if (this.hostPort == -1)
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "'hostPort' must be configured to run SolrCloud");
|
||||
|
@ -129,6 +132,10 @@ public class CloudConfig {
|
|||
return pkiHandlerPublicKeyPath;
|
||||
}
|
||||
|
||||
public boolean getDistributedClusterStateUpdates() {
|
||||
return useDistributedClusterStateUpdates;
|
||||
}
|
||||
|
||||
public static class CloudConfigBuilder {
|
||||
|
||||
private static final int DEFAULT_ZK_CLIENT_TIMEOUT = 45000;
|
||||
|
@ -151,6 +158,7 @@ public class CloudConfig {
|
|||
private boolean createCollectionCheckLeaderActive = DEFAULT_CREATE_COLLECTION_CHECK_LEADER_ACTIVE;
|
||||
private String pkiHandlerPrivateKeyPath;
|
||||
private String pkiHandlerPublicKeyPath;
|
||||
private boolean useDistributedClusterStateUpdates = false;
|
||||
|
||||
public CloudConfigBuilder(String hostName, int hostPort) {
|
||||
this(hostName, hostPort, null);
|
||||
|
@ -217,10 +225,15 @@ public class CloudConfig {
|
|||
return this;
|
||||
}
|
||||
|
||||
public CloudConfigBuilder setUseDistributedClusterStateUpdates(boolean useDistributedClusterStateUpdates) {
|
||||
this.useDistributedClusterStateUpdates = useDistributedClusterStateUpdates;
|
||||
return this;
|
||||
}
|
||||
|
||||
public CloudConfig build() {
|
||||
return new CloudConfig(zkHost, zkClientTimeout, hostPort, hostName, hostContext, useGenericCoreNames, leaderVoteWait,
|
||||
leaderConflictResolveWait, zkCredentialsProviderClass, zkACLProviderClass, createCollectionWaitTimeTillActive,
|
||||
createCollectionCheckLeaderActive, pkiHandlerPrivateKeyPath, pkiHandlerPublicKeyPath);
|
||||
createCollectionCheckLeaderActive, pkiHandlerPrivateKeyPath, pkiHandlerPublicKeyPath, useDistributedClusterStateUpdates);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -485,6 +485,9 @@ public class SolrXmlConfig {
|
|||
case "pkiHandlerPublicKeyPath":
|
||||
builder.setPkiHandlerPublicKeyPath(value);
|
||||
break;
|
||||
case "distributedClusterStateUpdates":
|
||||
builder.setUseDistributedClusterStateUpdates(Boolean.parseBoolean(value));
|
||||
break;
|
||||
default:
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown configuration parameter in <solrcloud> section of solr.xml: " + name);
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
|||
import org.apache.solr.client.solrj.request.CoreAdminRequest.RequestSyncShard;
|
||||
import org.apache.solr.client.solrj.response.RequestStatusState;
|
||||
import org.apache.solr.client.solrj.util.SolrIdentifierValidator;
|
||||
import org.apache.solr.cloud.DistributedClusterStateUpdater;
|
||||
import org.apache.solr.cloud.OverseerSolrResponse;
|
||||
import org.apache.solr.cloud.OverseerSolrResponseSerializer;
|
||||
import org.apache.solr.cloud.OverseerTaskQueue;
|
||||
|
@ -125,13 +126,12 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
|
|||
|
||||
protected final CoreContainer coreContainer;
|
||||
private final CollectionHandlerApi v2Handler;
|
||||
private final DistributedClusterStateUpdater distributedClusterStateUpdater;
|
||||
|
||||
public CollectionsHandler() {
|
||||
super();
|
||||
// Unlike most request handlers, CoreContainer initialization
|
||||
// should happen in the constructor...
|
||||
this.coreContainer = null;
|
||||
v2Handler = new CollectionHandlerApi(this);
|
||||
this(null);
|
||||
}
|
||||
|
||||
|
||||
|
@ -143,6 +143,17 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
|
|||
public CollectionsHandler(final CoreContainer coreContainer) {
|
||||
this.coreContainer = coreContainer;
|
||||
v2Handler = new CollectionHandlerApi(this);
|
||||
// Get the state change factory to know if need to enqueue to Overseer or process distributed.
|
||||
// Some SolrCloud tests do not need Zookeeper and end up with a null cloudConfig in NodeConfig (because
|
||||
// TestHarness.buildTestNodeConfig() uses the zkHost to decide it's SolrCloud).
|
||||
// These tests do not use Zookeeper and do not do state updates (see subclasses of TestBaseStatsCacheCloud).
|
||||
// Some non SolrCloud tests do not even pass a config at all, so let be cautious here (code is not pretty).
|
||||
// We do want to initialize here and not do it lazy to not deal with synchronization for actual prod code.
|
||||
if (coreContainer == null || coreContainer.getConfig() == null || coreContainer.getConfig().getCloudConfig() == null) {
|
||||
distributedClusterStateUpdater = null;
|
||||
} else {
|
||||
distributedClusterStateUpdater = new DistributedClusterStateUpdater(coreContainer.getConfig().getCloudConfig().getDistributedClusterStateUpdates());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -246,7 +257,8 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
|
|||
rsp.setException(exp);
|
||||
}
|
||||
|
||||
//TODO yuck; shouldn't create-collection at the overseer do this? (conditionally perhaps)
|
||||
// Even if Overseer does wait for the collection to be created, it sees a different cluster state than this node,
|
||||
// so this wait is required to make sure the local node Zookeeper watches fired and now see the collection.
|
||||
if (action.equals(CollectionAction.CREATE) && asyncId == null) {
|
||||
if (rsp.getException() == null) {
|
||||
waitForActiveCollection(zkProps.getStr(NAME), cores, overseerResponse);
|
||||
|
@ -254,8 +266,16 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
|
|||
}
|
||||
|
||||
} else {
|
||||
// submits and doesn't wait for anything (no response)
|
||||
coreContainer.getZkController().getOverseer().offerStateUpdate(Utils.toJSON(props));
|
||||
if (distributedClusterStateUpdater.isDistributedStateUpdate()) {
|
||||
DistributedClusterStateUpdater.MutatingCommand command = DistributedClusterStateUpdater.MutatingCommand.getCommandFor(operation.action);
|
||||
ZkNodeProps message = new ZkNodeProps(props);
|
||||
// We do the state change synchronously but do not wait for it to be visible in this node's cluster state updated via ZK watches
|
||||
distributedClusterStateUpdater.doSingleStateUpdate(command, message,
|
||||
coreContainer.getZkController().getSolrCloudManager(), coreContainer.getZkController().getZkStateReader());
|
||||
} else {
|
||||
// submits and doesn't wait for anything (no response)
|
||||
coreContainer.getZkController().getOverseer().offerStateUpdate(Utils.toJSON(props));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -33,6 +33,7 @@ import java.util.concurrent.locks.ReentrantLock;
|
|||
|
||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
import org.apache.solr.cloud.CloudDescriptor;
|
||||
import org.apache.solr.cloud.DistributedClusterStateUpdater;
|
||||
import org.apache.solr.cloud.Overseer;
|
||||
import org.apache.solr.cloud.ZkController;
|
||||
import org.apache.solr.cloud.ZkShardTerms;
|
||||
|
@ -48,6 +49,7 @@ import org.apache.solr.common.cloud.Replica;
|
|||
import org.apache.solr.common.cloud.RoutingRule;
|
||||
import org.apache.solr.common.cloud.Slice;
|
||||
import org.apache.solr.common.cloud.ZkCoreNodeProps;
|
||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.cloud.ZooKeeperException;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
|
@ -97,6 +99,7 @@ public class DistributedZkUpdateProcessor extends DistributedUpdateProcessor {
|
|||
// this is set to true in the constructor if the next processors in the chain
|
||||
// are custom and may modify the SolrInputDocument racing with its serialization for replication
|
||||
private final boolean cloneRequiredOnLeader;
|
||||
private final DistributedClusterStateUpdater distributedClusterStateUpdater;
|
||||
|
||||
//used for keeping track of replicas that have processed an add/update from the leader
|
||||
private RollupRequestReplicationTracker rollupReplicationTracker = null;
|
||||
|
@ -110,6 +113,7 @@ public class DistributedZkUpdateProcessor extends DistributedUpdateProcessor {
|
|||
CoreContainer cc = req.getCore().getCoreContainer();
|
||||
cloudDesc = req.getCore().getCoreDescriptor().getCloudDescriptor();
|
||||
zkController = cc.getZkController();
|
||||
distributedClusterStateUpdater = zkController.getDistributedClusterStateUpdater();
|
||||
cmdDistrib = new SolrCmdDistributor(cc.getUpdateShardHandler());
|
||||
cloneRequiredOnLeader = isCloneRequiredOnLeader(next);
|
||||
collection = cloudDesc.getCollectionName();
|
||||
|
@ -936,7 +940,13 @@ public class DistributedZkUpdateProcessor extends DistributedUpdateProcessor {
|
|||
ZkStateReader.COLLECTION_PROP, collection,
|
||||
ZkStateReader.SHARD_ID_PROP, myShardId,
|
||||
"routeKey", routeKey + "!");
|
||||
zkController.getOverseer().offerStateUpdate(Utils.toJSON(map));
|
||||
if (distributedClusterStateUpdater.isDistributedStateUpdate()) {
|
||||
ZkNodeProps message = new ZkNodeProps(map);
|
||||
distributedClusterStateUpdater.doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.SliceRemoveRoutingRule, message,
|
||||
zkController.getOverseer().getSolrCloudManager(), zkController.getOverseer().getZkStateReader());
|
||||
} else {
|
||||
zkController.getOverseer().offerStateUpdate(Utils.toJSON(map));
|
||||
}
|
||||
} catch (KeeperException e) {
|
||||
log.warn("Exception while removing routing rule for route key: {}", routeKey, e);
|
||||
} catch (Exception e) {
|
||||
|
|
|
@ -56,6 +56,7 @@ public class CreateCollectionCleanupTest extends SolrCloudTestCase {
|
|||
" <int name=\"distribUpdateConnTimeout\">${distribUpdateConnTimeout:45000}</int>\n" +
|
||||
" <int name=\"distribUpdateSoTimeout\">${distribUpdateSoTimeout:340000}</int>\n" +
|
||||
" <int name=\"createCollectionWaitTimeTillActive\">${createCollectionWaitTimeTillActive:10}</int>\n" +
|
||||
" <str name=\"distributedClusterStateUpdates\">${solr.distributedClusterStateUpdates:false}</str> \n" +
|
||||
" </solrcloud>\n" +
|
||||
" \n" +
|
||||
"</solr>\n";
|
||||
|
@ -66,6 +67,7 @@ public class CreateCollectionCleanupTest extends SolrCloudTestCase {
|
|||
configureCluster(1)
|
||||
.addConfig("conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
|
||||
.withSolrXml(CLOUD_SOLR_XML_WITH_10S_CREATE_COLL_WAIT)
|
||||
.useOtherClusterStateUpdateStrategy()
|
||||
.configure();
|
||||
}
|
||||
|
||||
|
|
|
@ -77,6 +77,7 @@ public class DeleteReplicaTest extends SolrCloudTestCase {
|
|||
// these tests need to be isolated, so we dont share the minicluster
|
||||
configureCluster(4)
|
||||
.addConfig("conf", configset("cloud-minimal"))
|
||||
.useOtherClusterStateUpdateStrategy() // Some tests (this one) use "the other" cluster state update strategy to increase coverage
|
||||
.configure();
|
||||
}
|
||||
|
||||
|
@ -231,7 +232,8 @@ public class DeleteReplicaTest extends SolrCloudTestCase {
|
|||
! r.equals(shard.getLeader())));
|
||||
|
||||
JettySolrRunner replicaJetty = cluster.getReplicaJetty(replica);
|
||||
ZkStateReaderAccessor accessor = new ZkStateReaderAccessor(replicaJetty.getCoreContainer().getZkController().getZkStateReader());
|
||||
ZkController replicaZkController = replicaJetty.getCoreContainer().getZkController();
|
||||
ZkStateReaderAccessor accessor = new ZkStateReaderAccessor(replicaZkController.getZkStateReader());
|
||||
|
||||
final long preDeleteWatcherCount = countUnloadCoreOnDeletedWatchers
|
||||
(accessor.getStateWatchers(collectionName));
|
||||
|
@ -243,7 +245,14 @@ public class DeleteReplicaTest extends SolrCloudTestCase {
|
|||
ZkStateReader.COLLECTION_PROP, collectionName,
|
||||
ZkStateReader.CORE_NODE_NAME_PROP, replica.getName());
|
||||
|
||||
cluster.getOpenOverseer().getStateUpdateQueue().offer(Utils.toJSON(m));
|
||||
if (replicaZkController.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
cluster.getOpenOverseer().getDistributedClusterStateUpdater().doSingleStateUpdate(
|
||||
DistributedClusterStateUpdater.MutatingCommand.SliceRemoveReplica, m,
|
||||
cluster.getOpenOverseer().getSolrCloudManager(),
|
||||
cluster.getOpenOverseer().getZkStateReader());
|
||||
} else {
|
||||
cluster.getOpenOverseer().getStateUpdateQueue().offer(Utils.toJSON(m));
|
||||
}
|
||||
|
||||
waitForState("Timeout waiting for replica get deleted", collectionName,
|
||||
(liveNodes, collectionState) -> collectionState.getSlice("shard1").getReplicas().size() == 2);
|
||||
|
@ -301,13 +310,22 @@ public class DeleteReplicaTest extends SolrCloudTestCase {
|
|||
log.info("Running delete core {}",cd);
|
||||
|
||||
try {
|
||||
ZkController replica1ZkController = replica1Jetty.getCoreContainer().getZkController();
|
||||
ZkNodeProps m = new ZkNodeProps(
|
||||
Overseer.QUEUE_OPERATION, OverseerAction.DELETECORE.toLower(),
|
||||
ZkStateReader.CORE_NAME_PROP, replica1.getCoreName(),
|
||||
ZkStateReader.NODE_NAME_PROP, replica1.getNodeName(),
|
||||
ZkStateReader.COLLECTION_PROP, collectionName,
|
||||
ZkStateReader.CORE_NODE_NAME_PROP, replica1.getName());
|
||||
cluster.getOpenOverseer().getStateUpdateQueue().offer(Utils.toJSON(m));
|
||||
|
||||
if (replica1ZkController.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
cluster.getOpenOverseer().getDistributedClusterStateUpdater().doSingleStateUpdate(
|
||||
DistributedClusterStateUpdater.MutatingCommand.SliceRemoveReplica, m,
|
||||
cluster.getOpenOverseer().getSolrCloudManager(),
|
||||
cluster.getOpenOverseer().getZkStateReader());
|
||||
} else {
|
||||
cluster.getOpenOverseer().getStateUpdateQueue().offer(Utils.toJSON(m));
|
||||
}
|
||||
|
||||
boolean replicaDeleted = false;
|
||||
TimeOut timeOut = new TimeOut(20, TimeUnit.SECONDS, TimeSource.NANO_TIME);
|
||||
|
@ -345,8 +363,14 @@ public class DeleteReplicaTest extends SolrCloudTestCase {
|
|||
try {
|
||||
replica1Jetty.stop();
|
||||
waitForNodeLeave(replica1JettyNodeName);
|
||||
waitForState("Expected replica:"+replica1+" get down", collectionName, (liveNodes, collectionState)
|
||||
-> collectionState.getSlice("shard1").getReplica(replica1.getName()).getState() == DOWN);
|
||||
|
||||
// There is a race condition: the replica might be marked down before we get here, in which case we never get notified
|
||||
// So we check before waiting... Not eliminating but significantly reducing the race window - eliminating would require
|
||||
// deeper changes in the code where the watcher is set.
|
||||
if (getCollectionState(collectionName).getSlice("shard1").getReplica(replica1.getName()).getState() != DOWN) {
|
||||
waitForState("Expected replica:" + replica1 + " get down", collectionName, (liveNodes, collectionState)
|
||||
-> collectionState.getSlice("shard1").getReplica(replica1.getName()).getState() == DOWN);
|
||||
}
|
||||
replica1Jetty.start();
|
||||
waitingForReplicaGetDeleted.acquire();
|
||||
} finally {
|
||||
|
|
|
@ -94,13 +94,22 @@ public class DeleteShardTest extends SolrCloudTestCase {
|
|||
CloudSolrClient client = cluster.getSolrClient();
|
||||
|
||||
// TODO can this be encapsulated better somewhere?
|
||||
DistributedQueue inQueue = cluster.getJettySolrRunner(0).getCoreContainer().getZkController().getOverseer().getStateUpdateQueue();
|
||||
Map<String, Object> propMap = new HashMap<>();
|
||||
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
|
||||
propMap.put(slice, state.toString());
|
||||
propMap.put(ZkStateReader.COLLECTION_PROP, collection);
|
||||
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||
inQueue.offer(Utils.toJSON(m));
|
||||
|
||||
final Overseer overseer = cluster.getOpenOverseer();
|
||||
if (overseer.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
overseer.getDistributedClusterStateUpdater().doSingleStateUpdate(
|
||||
DistributedClusterStateUpdater.MutatingCommand.SliceUpdateShardState, m,
|
||||
cluster.getOpenOverseer().getSolrCloudManager(),
|
||||
cluster.getOpenOverseer().getZkStateReader());
|
||||
} else {
|
||||
DistributedQueue inQueue = cluster.getJettySolrRunner(0).getCoreContainer().getZkController().getOverseer().getStateUpdateQueue();
|
||||
inQueue.offer(Utils.toJSON(m));
|
||||
}
|
||||
|
||||
waitForState("Expected shard " + slice + " to be in state " + state.toString(), collection, (n, c) -> {
|
||||
return c.getSlice(slice).getState() == state;
|
||||
|
|
|
@ -26,8 +26,17 @@ public class MockSolrSource {
|
|||
|
||||
public static ZkController makeSimpleMock(Overseer overseer, ZkStateReader reader, SolrZkClient zkClient) {
|
||||
ZkController zkControllerMock = mock(ZkController.class);
|
||||
if (overseer == null) overseer = mock(Overseer.class);
|
||||
|
||||
final DistributedClusterStateUpdater distributedClusterStateUpdater;
|
||||
if (overseer == null) {
|
||||
// When no overseer is passed, the Overseer queue does nothing. Replicate this in how we handle distributed state
|
||||
// updates by doing nothing as well...
|
||||
distributedClusterStateUpdater = mock(DistributedClusterStateUpdater.class);
|
||||
overseer = mock(Overseer.class);
|
||||
when(overseer.getDistributedClusterStateUpdater()).thenReturn(distributedClusterStateUpdater);
|
||||
} else {
|
||||
// Use the same configuration for state updates as the Overseer.
|
||||
distributedClusterStateUpdater = overseer.getDistributedClusterStateUpdater();
|
||||
}
|
||||
|
||||
if (reader != null && zkClient == null) {
|
||||
zkClient = reader.getZkClient();
|
||||
|
@ -38,11 +47,11 @@ public class MockSolrSource {
|
|||
when(reader.getZkClient()).thenReturn(zkClient);
|
||||
}
|
||||
|
||||
|
||||
when(zkControllerMock.getOverseer()).thenReturn(overseer);
|
||||
when(zkControllerMock.getZkStateReader()).thenReturn(reader);
|
||||
when(zkControllerMock.getZkClient()).thenReturn(zkClient);
|
||||
when(zkControllerMock.getOverseer()).thenReturn(overseer);
|
||||
when(zkControllerMock.getDistributedClusterStateUpdater()).thenReturn(distributedClusterStateUpdater);
|
||||
return zkControllerMock;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -107,6 +107,8 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
|
|||
private static OverseerTaskQueue workQueueMock;
|
||||
private static OverseerTaskQueue stateUpdateQueueMock;
|
||||
private static Overseer overseerMock;
|
||||
private static DistributedClusterStateUpdater distributedClusterStateUpdater;
|
||||
private static DistributedClusterStateUpdater.StateChangeRecorder stateChangeRecorder;
|
||||
private static ZkController zkControllerMock;
|
||||
private static SolrCloudManager cloudDataProviderMock;
|
||||
private static ClusterStateProvider clusterStateProviderMock;
|
||||
|
@ -152,7 +154,7 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
|
|||
DistributedMap completedMap,
|
||||
DistributedMap failureMap,
|
||||
SolrMetricsContext solrMetricsContext) {
|
||||
super(zkStateReader, myId, shardHandlerFactory, adminPath, new Stats(), overseer, new OverseerNodePrioritizer(zkStateReader, overseer.getStateUpdateQueue(), adminPath, shardHandlerFactory), workQueue, runningMap, completedMap, failureMap, solrMetricsContext);
|
||||
super(zkStateReader, myId, shardHandlerFactory, adminPath, new Stats(), overseer, new OverseerNodePrioritizer(zkStateReader, overseer, adminPath, shardHandlerFactory), workQueue, runningMap, completedMap, failureMap, solrMetricsContext);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -177,6 +179,8 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
|
|||
clusterStateMock = mock(ClusterState.class);
|
||||
solrZkClientMock = mock(SolrZkClient.class);
|
||||
overseerMock = mock(Overseer.class);
|
||||
distributedClusterStateUpdater = mock(DistributedClusterStateUpdater.class);
|
||||
stateChangeRecorder = mock(DistributedClusterStateUpdater.StateChangeRecorder.class);
|
||||
zkControllerMock = mock(ZkController.class);
|
||||
cloudDataProviderMock = mock(SolrCloudManager.class);
|
||||
objectCache = new ObjectCache();
|
||||
|
@ -204,6 +208,8 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
|
|||
clusterStateMock = null;
|
||||
solrZkClientMock = null;
|
||||
overseerMock = null;
|
||||
distributedClusterStateUpdater = null;
|
||||
stateChangeRecorder = null;
|
||||
zkControllerMock = null;
|
||||
cloudDataProviderMock = null;
|
||||
clusterStateProviderMock = null;
|
||||
|
@ -232,6 +238,8 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
|
|||
reset(clusterStateMock);
|
||||
reset(solrZkClientMock);
|
||||
reset(overseerMock);
|
||||
reset(distributedClusterStateUpdater);
|
||||
reset(stateChangeRecorder);
|
||||
reset(zkControllerMock);
|
||||
reset(cloudDataProviderMock);
|
||||
objectCache.clear();
|
||||
|
@ -259,7 +267,7 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
|
|||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
protected Set<String> commonMocks(int liveNodesCount) throws Exception {
|
||||
protected Set<String> commonMocks(int liveNodesCount, boolean distributedClusterStateUpdates) throws Exception {
|
||||
when(shardHandlerFactoryMock.getShardHandler()).thenReturn(shardHandlerMock);
|
||||
when(workQueueMock.peekTopN(anyInt(), any(), anyLong())).thenAnswer(invocation -> {
|
||||
Object result;
|
||||
|
@ -379,6 +387,8 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
|
|||
when(overseerMock.getZkController()).thenReturn(zkControllerMock);
|
||||
when(overseerMock.getSolrCloudManager()).thenReturn(cloudDataProviderMock);
|
||||
when(overseerMock.getCoreContainer()).thenReturn(coreContainerMock);
|
||||
when(overseerMock.getDistributedClusterStateUpdater()).thenReturn(distributedClusterStateUpdater);
|
||||
when(distributedClusterStateUpdater.createStateChangeRecorder(any(), anyBoolean())).thenReturn(stateChangeRecorder);
|
||||
when(coreContainerMock.getUpdateShardHandler()).thenReturn(updateShardHandlerMock);
|
||||
when(coreContainerMock.getPlacementPluginFactory()).thenReturn(placementPluginFactoryMock);
|
||||
when(updateShardHandlerMock.getDefaultHttpClient()).thenReturn(httpClientMock);
|
||||
|
@ -450,21 +460,45 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
|
|||
|
||||
when(overseerMock.getStateUpdateQueue(any())).thenReturn(stateUpdateQueueMock);
|
||||
when(overseerMock.getStateUpdateQueue()).thenReturn(stateUpdateQueueMock);
|
||||
|
||||
Mockito.doAnswer(
|
||||
new Answer<Void>() {
|
||||
public Void answer(InvocationOnMock invocation) {
|
||||
try {
|
||||
handleCreateCollMessage(invocation.getArgument(0));
|
||||
stateUpdateQueueMock.offer(invocation.getArgument(0));
|
||||
} catch (KeeperException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
|
||||
// Selecting the cluster state update strategy: Overseer when distributedClusterStateUpdates is false, otherwise distributed updates.
|
||||
when(distributedClusterStateUpdater.isDistributedStateUpdate()).thenReturn(distributedClusterStateUpdates);
|
||||
|
||||
if (distributedClusterStateUpdates) {
|
||||
// Mocking for state change via distributed updates. There are two types of updates done in CreateCollectionCmd:
|
||||
// 1. Single line recording and executing a command
|
||||
Mockito.doAnswer(
|
||||
new Answer<Void>() {
|
||||
public Void answer(InvocationOnMock invocation) {
|
||||
handleCreateCollMessageProps(invocation.getArgument(1));
|
||||
return null;
|
||||
}}).when(distributedClusterStateUpdater).doSingleStateUpdate(any(), any(), any(), any());
|
||||
|
||||
// 2. Recording a command to be executed as part of a batch of commands
|
||||
Mockito.doAnswer(
|
||||
new Answer<Void>() {
|
||||
public Void answer(InvocationOnMock invocation) {
|
||||
handleCreateCollMessageProps(invocation.getArgument(1));
|
||||
return null;
|
||||
}}).when(stateChangeRecorder).record(any(), any());
|
||||
} else {
|
||||
// Mocking for state change via the Overseer queue
|
||||
Mockito.doAnswer(
|
||||
new Answer<Void>() {
|
||||
public Void answer(InvocationOnMock invocation) {
|
||||
try {
|
||||
handleCreateCollMessage(invocation.getArgument(0));
|
||||
stateUpdateQueueMock.offer(invocation.getArgument(0));
|
||||
} catch (KeeperException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
return null;
|
||||
}}).when(overseerMock).offerStateUpdate(any());
|
||||
|
||||
}).when(overseerMock).offerStateUpdate(any());
|
||||
}
|
||||
|
||||
when(zkControllerMock.getZkClient()).thenReturn(solrZkClientMock);
|
||||
|
||||
when(cloudManagerMock.getDistribStateManager()).thenReturn(distribStateManagerMock);
|
||||
|
@ -520,9 +554,12 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
|
|||
}
|
||||
|
||||
private void handleCreateCollMessage(byte[] bytes) {
|
||||
handleCreateCollMessageProps(ZkNodeProps.load(bytes));
|
||||
}
|
||||
|
||||
private void handleCreateCollMessageProps(ZkNodeProps props) {
|
||||
log.info("track created replicas / collections");
|
||||
try {
|
||||
ZkNodeProps props = ZkNodeProps.load(bytes);
|
||||
if (CollectionParams.CollectionAction.CREATE.isEqual(props.getStr("operation"))) {
|
||||
String collName = props.getStr("name");
|
||||
if (collName != null) collectionsSet.put(collName, new ClusterState.CollectionRef(
|
||||
|
@ -732,12 +769,11 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
|
|||
SEND_NULL
|
||||
}
|
||||
protected void testTemplate(Integer numberOfNodes, Integer numberOfNodesToCreateOn, CreateNodeListOptions createNodeListOption, Integer replicationFactor,
|
||||
Integer numberOfSlices,
|
||||
boolean collectionExceptedToBeCreated) throws Exception {
|
||||
Integer numberOfSlices, boolean collectionExceptedToBeCreated, boolean distributedClusterStateUpdates) throws Exception {
|
||||
assertTrue("Wrong usage of testTemplate. numberOfNodesToCreateOn " + numberOfNodesToCreateOn + " is not allowed to be higher than numberOfNodes " + numberOfNodes, numberOfNodes.intValue() >= numberOfNodesToCreateOn.intValue());
|
||||
assertTrue("Wrong usage of testTemplage. createNodeListOption has to be " + CreateNodeListOptions.SEND + " when numberOfNodes and numberOfNodesToCreateOn are unequal", ((createNodeListOption == CreateNodeListOptions.SEND) || (numberOfNodes.intValue() == numberOfNodesToCreateOn.intValue())));
|
||||
|
||||
Set<String> liveNodes = commonMocks(numberOfNodes);
|
||||
Set<String> liveNodes = commonMocks(numberOfNodes, distributedClusterStateUpdates);
|
||||
List<String> createNodeList = new ArrayList<>();
|
||||
int i = 0;
|
||||
for (String node : liveNodes) {
|
||||
|
@ -774,140 +810,250 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
|
|||
createNodeList, dontShuffleCreateNodeSet);
|
||||
}
|
||||
}
|
||||
@Test
|
||||
public void testNoReplicationEqualNumberOfSlicesPerNode() throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 4;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.DONT_SEND;
|
||||
Integer replicationFactor = 1;
|
||||
Integer numberOfSlices = 8;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
true);
|
||||
}
|
||||
|
||||
|
||||
// Tests below are being run twice: once with Overseer based updates and once with distributed updates.
|
||||
// This is done explicitly here because these tests use mocks than can be configured directly.
|
||||
// Tests not using mocks (most other tests) but using the MiniSolrCloudCluster are randomized to sometimes use Overseer
|
||||
// and sometimes distributed state updates (but not both for a given test and a given test seed).
|
||||
// See the SolrCloudTestCase.Builder constructor and the rest of the Builder class.
|
||||
|
||||
@Test
|
||||
public void testReplicationEqualNumberOfSlicesPerNode() throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 4;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.DONT_SEND;
|
||||
Integer replicationFactor = 2;
|
||||
Integer numberOfSlices = 4;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoReplicationEqualNumberOfSlicesPerNodeSendCreateNodesEqualToLiveNodes() throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 4;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.SEND;
|
||||
Integer replicationFactor = 1;
|
||||
Integer numberOfSlices = 8;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReplicationEqualNumberOfSlicesPerNodeSendCreateNodesEqualToLiveNodes() throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 4;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.SEND;
|
||||
Integer replicationFactor = 2;
|
||||
Integer numberOfSlices = 4;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoReplicationEqualNumberOfSlicesPerNodeSendNullCreateNodes() throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 4;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.SEND_NULL;
|
||||
Integer replicationFactor = 1;
|
||||
Integer numberOfSlices = 8;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReplicationEqualNumberOfSlicesPerNodeSendNullCreateNodes() throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 4;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.SEND_NULL;
|
||||
Integer replicationFactor = 2;
|
||||
Integer numberOfSlices = 4;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoReplicationUnequalNumberOfSlicesPerNode() throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 4;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.DONT_SEND;
|
||||
Integer replicationFactor = 1;
|
||||
Integer numberOfSlices = 6;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReplicationUnequalNumberOfSlicesPerNode() throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 4;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.DONT_SEND;
|
||||
Integer replicationFactor = 2;
|
||||
Integer numberOfSlices = 3;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoReplicationLimitedNodesToCreateOn()
|
||||
throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 2;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.SEND;
|
||||
Integer replicationFactor = 1;
|
||||
Integer numberOfSlices = 6;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReplicationLimitedNodesToCreateOn()
|
||||
throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 2;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.SEND;
|
||||
Integer replicationFactor = 2;
|
||||
Integer numberOfSlices = 3;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
true);
|
||||
public void testNoReplicationEqualNumberOfSlicesPerNodeOverseer() throws Exception {
|
||||
testNoReplicationEqualNumberOfSlicesPerNodeInternal(false);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoReplicationCollectionNotCreatedDueToMaxShardsPerNodeAndNodesToCreateOnLimits()
|
||||
throws Exception {
|
||||
public void testNoReplicationEqualNumberOfSlicesPerNodeDistributedUpdates() throws Exception {
|
||||
testNoReplicationEqualNumberOfSlicesPerNodeInternal(true);
|
||||
}
|
||||
|
||||
private void testNoReplicationEqualNumberOfSlicesPerNodeInternal(boolean distributedClusterStateUpdates) throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 4;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.DONT_SEND;
|
||||
Integer replicationFactor = 1;
|
||||
Integer numberOfSlices = 8;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
true, distributedClusterStateUpdates);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReplicationEqualNumberOfSlicesPerNodeOverseer() throws Exception {
|
||||
testReplicationEqualNumberOfSlicesPerNodeInternal(false);
|
||||
}
|
||||
@Test
|
||||
public void testReplicationEqualNumberOfSlicesPerNodeDistributedUpdates() throws Exception {
|
||||
testReplicationEqualNumberOfSlicesPerNodeInternal(true);
|
||||
}
|
||||
|
||||
private void testReplicationEqualNumberOfSlicesPerNodeInternal(boolean distributedClusterStateUpdates) throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 4;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.DONT_SEND;
|
||||
Integer replicationFactor = 2;
|
||||
Integer numberOfSlices = 4;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
true, distributedClusterStateUpdates);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoReplicationEqualNumberOfSlicesPerNodeSendCreateNodesEqualToLiveNodesOverseer() throws Exception {
|
||||
testNoReplicationEqualNumberOfSlicesPerNodeSendCreateNodesEqualToLiveNodesInternal(false);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoReplicationEqualNumberOfSlicesPerNodeSendCreateNodesEqualToLiveNodesDistributedUpdates() throws Exception {
|
||||
testNoReplicationEqualNumberOfSlicesPerNodeSendCreateNodesEqualToLiveNodesInternal(true);
|
||||
}
|
||||
|
||||
private void testNoReplicationEqualNumberOfSlicesPerNodeSendCreateNodesEqualToLiveNodesInternal(boolean distributedClusterStateUpdates) throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 4;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.SEND;
|
||||
Integer replicationFactor = 1;
|
||||
Integer numberOfSlices = 8;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
true, distributedClusterStateUpdates);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReplicationEqualNumberOfSlicesPerNodeSendCreateNodesEqualToLiveNodesOverseer() throws Exception {
|
||||
testReplicationEqualNumberOfSlicesPerNodeSendCreateNodesEqualToLiveNodesInternal(false);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReplicationEqualNumberOfSlicesPerNodeSendCreateNodesEqualToLiveNodesDistributedUpdates() throws Exception {
|
||||
testReplicationEqualNumberOfSlicesPerNodeSendCreateNodesEqualToLiveNodesInternal(true);
|
||||
}
|
||||
|
||||
private void testReplicationEqualNumberOfSlicesPerNodeSendCreateNodesEqualToLiveNodesInternal(boolean distributedClusterStateUpdates) throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 4;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.SEND;
|
||||
Integer replicationFactor = 2;
|
||||
Integer numberOfSlices = 4;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
true, distributedClusterStateUpdates);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoReplicationEqualNumberOfSlicesPerNodeSendNullCreateNodesOverseer() throws Exception {
|
||||
testNoReplicationEqualNumberOfSlicesPerNodeSendNullCreateNodesInternal(false);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoReplicationEqualNumberOfSlicesPerNodeSendNullCreateNodesDistributedUpdates() throws Exception {
|
||||
testNoReplicationEqualNumberOfSlicesPerNodeSendNullCreateNodesInternal(true);
|
||||
}
|
||||
|
||||
private void testNoReplicationEqualNumberOfSlicesPerNodeSendNullCreateNodesInternal(boolean distributedClusterStateUpdates) throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 4;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.SEND_NULL;
|
||||
Integer replicationFactor = 1;
|
||||
Integer numberOfSlices = 8;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
true, distributedClusterStateUpdates);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReplicationEqualNumberOfSlicesPerNodeSendNullCreateNodesOverseer() throws Exception {
|
||||
testReplicationEqualNumberOfSlicesPerNodeSendNullCreateNodesInternal(false);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReplicationEqualNumberOfSlicesPerNodeSendNullCreateNodesDistributedUpdates() throws Exception {
|
||||
testReplicationEqualNumberOfSlicesPerNodeSendNullCreateNodesInternal(true);
|
||||
}
|
||||
|
||||
private void testReplicationEqualNumberOfSlicesPerNodeSendNullCreateNodesInternal(boolean distributedClusterStateUpdates) throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 4;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.SEND_NULL;
|
||||
Integer replicationFactor = 2;
|
||||
Integer numberOfSlices = 4;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
true, distributedClusterStateUpdates);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoReplicationUnequalNumberOfSlicesPerNodeOverseer() throws Exception {
|
||||
testNoReplicationUnequalNumberOfSlicesPerNodeInternal(false);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoReplicationUnequalNumberOfSlicesPerNodeDistributedUpdates() throws Exception {
|
||||
testNoReplicationUnequalNumberOfSlicesPerNodeInternal(true);
|
||||
}
|
||||
|
||||
private void testNoReplicationUnequalNumberOfSlicesPerNodeInternal(boolean distributedClusterStateUpdates) throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 4;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.DONT_SEND;
|
||||
Integer replicationFactor = 1;
|
||||
Integer numberOfSlices = 6;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
true, distributedClusterStateUpdates);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReplicationUnequalNumberOfSlicesPerNodeOverseer() throws Exception {
|
||||
testReplicationUnequalNumberOfSlicesPerNodeInternal(false);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReplicationUnequalNumberOfSlicesPerNodeDistributedUpdates() throws Exception {
|
||||
testReplicationUnequalNumberOfSlicesPerNodeInternal(true);
|
||||
}
|
||||
|
||||
private void testReplicationUnequalNumberOfSlicesPerNodeInternal(boolean distributedClusterStateUpdates) throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 4;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.DONT_SEND;
|
||||
Integer replicationFactor = 2;
|
||||
Integer numberOfSlices = 3;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
true, distributedClusterStateUpdates);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoReplicationLimitedNodesToCreateOnOverseer() throws Exception {
|
||||
testNoReplicationLimitedNodesToCreateOnInternal(false);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoReplicationLimitedNodesToCreateOnDistributedUpdates() throws Exception {
|
||||
testNoReplicationLimitedNodesToCreateOnInternal(true);
|
||||
}
|
||||
|
||||
private void testNoReplicationLimitedNodesToCreateOnInternal(boolean distributedClusterStateUpdates) throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 2;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.SEND;
|
||||
Integer replicationFactor = 1;
|
||||
Integer numberOfSlices = 6;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
true, distributedClusterStateUpdates);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReplicationLimitedNodesToCreateOnOverseer() throws Exception {
|
||||
testReplicationLimitedNodesToCreateOnInternal(false);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReplicationLimitedNodesToCreateOnDistributedUpdates() throws Exception {
|
||||
testReplicationLimitedNodesToCreateOnInternal(true);
|
||||
}
|
||||
|
||||
private void testReplicationLimitedNodesToCreateOnInternal(boolean distributedClusterStateUpdates) throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 2;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.SEND;
|
||||
Integer replicationFactor = 2;
|
||||
Integer numberOfSlices = 3;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
true, distributedClusterStateUpdates);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoReplicationCollectionNotCreatedDueToMaxShardsPerNodeAndNodesToCreateOnLimitsOverseer() throws Exception {
|
||||
testNoReplicationCollectionNotCreatedDueToMaxShardsPerNodeAndNodesToCreateOnLimitsInternal(false);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoReplicationCollectionNotCreatedDueToMaxShardsPerNodeAndNodesToCreateOnLimitsDistributedUpdates() throws Exception {
|
||||
testNoReplicationCollectionNotCreatedDueToMaxShardsPerNodeAndNodesToCreateOnLimitsInternal(true);
|
||||
}
|
||||
|
||||
private void testNoReplicationCollectionNotCreatedDueToMaxShardsPerNodeAndNodesToCreateOnLimitsInternal(boolean distributedClusterStateUpdates) throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 3;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.SEND;
|
||||
Integer replicationFactor = 1;
|
||||
Integer numberOfSlices = 8;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
false);
|
||||
false, distributedClusterStateUpdates);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testReplicationCollectionNotCreatedDueToMaxShardsPerNodeAndNodesToCreateOnLimits()
|
||||
throws Exception {
|
||||
public void testReplicationCollectionNotCreatedDueToMaxShardsPerNodeAndNodesToCreateOnLimitsOverseer() throws Exception {
|
||||
testReplicationCollectionNotCreatedDueToMaxShardsPerNodeAndNodesToCreateOnLimitsInternal(false);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReplicationCollectionNotCreatedDueToMaxShardsPerNodeAndNodesToCreateOnLimitsDistributedUpdates() throws Exception {
|
||||
testReplicationCollectionNotCreatedDueToMaxShardsPerNodeAndNodesToCreateOnLimitsInternal(true);
|
||||
}
|
||||
|
||||
private void testReplicationCollectionNotCreatedDueToMaxShardsPerNodeAndNodesToCreateOnLimitsInternal(boolean distributedClusterStateUpdates) throws Exception {
|
||||
Integer numberOfNodes = 4;
|
||||
Integer numberOfNodesToCreateOn = 3;
|
||||
CreateNodeListOptions createNodeListOptions = CreateNodeListOptions.SEND;
|
||||
Integer replicationFactor = 2;
|
||||
Integer numberOfSlices = 4;
|
||||
testTemplate(numberOfNodes, numberOfNodesToCreateOn, createNodeListOptions, replicationFactor, numberOfSlices,
|
||||
false);
|
||||
false, distributedClusterStateUpdates);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -26,10 +26,7 @@ import java.util.function.Predicate;
|
|||
|
||||
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||
import org.apache.solr.cloud.overseer.OverseerAction;
|
||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||
import org.apache.solr.common.util.TimeSource;
|
||||
import org.apache.solr.common.util.Utils;
|
||||
import org.apache.solr.util.TimeOut;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.junit.After;
|
||||
|
@ -157,9 +154,7 @@ public class OverseerRolesTest extends SolrCloudTestCase {
|
|||
String leaderId = OverseerCollectionConfigSetProcessor.getLeaderId(zkClient());
|
||||
String leader = OverseerCollectionConfigSetProcessor.getLeaderNode(zkClient());
|
||||
log.info("### Sending QUIT to overseer {}", leader);
|
||||
getOverseerJetty().getCoreContainer().getZkController().getOverseer().getStateUpdateQueue()
|
||||
.offer(Utils.toJSON(new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.QUIT.toLower(),
|
||||
"id", leaderId)));
|
||||
getOverseerJetty().getCoreContainer().getZkController().getOverseer().sendQuitToOverseer(leaderId);
|
||||
|
||||
waitForNewOverseer(15, s -> Objects.equals(leader, s) == false, false);
|
||||
|
||||
|
|
|
@ -49,8 +49,11 @@ public class OverseerStatusTest extends SolrCloudTestCase {
|
|||
SimpleOrderedMap<Object> createcollection
|
||||
= (SimpleOrderedMap<Object>) collection_operations.get(CollectionParams.CollectionAction.CREATE.toLower());
|
||||
assertEquals("No stats for create in OverseerCollectionProcessor", numCollectionCreates + 1, createcollection.get("requests"));
|
||||
createcollection = (SimpleOrderedMap<Object>) overseer_operations.get(CollectionParams.CollectionAction.CREATE.toLower());
|
||||
assertEquals("No stats for create in Overseer", numOverseerCreates + 1, createcollection.get("requests"));
|
||||
// When cluster state updates are distributed, Overseer doesn't see the updates and doesn't report stats on them.
|
||||
if (!cluster.getOpenOverseer().getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
createcollection = (SimpleOrderedMap<Object>) overseer_operations.get(CollectionParams.CollectionAction.CREATE.toLower());
|
||||
assertEquals("No stats for create in Overseer", numOverseerCreates + 1, createcollection.get("requests"));
|
||||
}
|
||||
|
||||
// Reload the collection
|
||||
CollectionAdminRequest.reloadCollection(collectionName).process(cluster.getSolrClient());
|
||||
|
@ -81,19 +84,21 @@ public class OverseerStatusTest extends SolrCloudTestCase {
|
|||
assertNotNull(amIleader.get("errors"));
|
||||
assertNotNull(amIleader.get("avgTimePerRequest"));
|
||||
|
||||
amIleader = (SimpleOrderedMap<Object>) overseer_operations.get("am_i_leader");
|
||||
assertNotNull("Overseer amILeader stats should not be null", amIleader);
|
||||
assertNotNull(amIleader.get("requests"));
|
||||
assertTrue(Integer.parseInt(amIleader.get("requests").toString()) > 0);
|
||||
assertNotNull(amIleader.get("errors"));
|
||||
assertNotNull(amIleader.get("avgTimePerRequest"));
|
||||
|
||||
SimpleOrderedMap<Object> updateState = (SimpleOrderedMap<Object>) overseer_operations.get("update_state");
|
||||
assertNotNull("Overseer update_state stats should not be null", updateState);
|
||||
assertNotNull(updateState.get("requests"));
|
||||
assertTrue(Integer.parseInt(updateState.get("requests").toString()) > 0);
|
||||
assertNotNull(updateState.get("errors"));
|
||||
assertNotNull(updateState.get("avgTimePerRequest"));
|
||||
// When cluster state updates are distributed, Overseer doesn't see the updates and doesn't report stats on them.
|
||||
if (!cluster.getOpenOverseer().getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
amIleader = (SimpleOrderedMap<Object>) overseer_operations.get("am_i_leader");
|
||||
assertNotNull("Overseer amILeader stats should not be null", amIleader);
|
||||
assertNotNull(amIleader.get("requests"));
|
||||
assertTrue(Integer.parseInt(amIleader.get("requests").toString()) > 0);
|
||||
assertNotNull(amIleader.get("errors"));
|
||||
assertNotNull(amIleader.get("avgTimePerRequest"));
|
||||
|
||||
SimpleOrderedMap<Object> updateState = (SimpleOrderedMap<Object>) overseer_operations.get("update_state");
|
||||
assertNotNull("Overseer update_state stats should not be null", updateState);
|
||||
assertNotNull(updateState.get("requests"));
|
||||
assertTrue(Integer.parseInt(updateState.get("requests").toString()) > 0);
|
||||
assertNotNull(updateState.get("errors"));
|
||||
assertNotNull(updateState.get("avgTimePerRequest"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -188,7 +188,10 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
ZkStateReader.REPLICATION_FACTOR, "1",
|
||||
ZkStateReader.NUM_SHARDS_PROP, Integer.toString(numShards),
|
||||
"createNodeSet", "");
|
||||
ZkDistributedQueue q = MiniSolrCloudCluster.getOpenOverseer(overseers).getStateUpdateQueue();
|
||||
final Overseer overseer = MiniSolrCloudCluster.getOpenOverseer(overseers);
|
||||
// This being an Overseer test, we force it to use the Overseer based cluster state update. Look for "new Overseer" calls in this class.
|
||||
assertFalse(overseer.getDistributedClusterStateUpdater().isDistributedStateUpdate());
|
||||
ZkDistributedQueue q = overseer.getStateUpdateQueue();
|
||||
q.offer(Utils.toJSON(m));
|
||||
}
|
||||
|
||||
|
@ -204,6 +207,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
ZkStateReader.CORE_NAME_PROP, coreName,
|
||||
ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName,
|
||||
ZkStateReader.COLLECTION_PROP, collection);
|
||||
assertFalse(overseer.getDistributedClusterStateUpdater().isDistributedStateUpdate());
|
||||
ZkDistributedQueue q = overseer.getStateUpdateQueue();
|
||||
q.offer(Utils.toJSON(m));
|
||||
return null;
|
||||
|
@ -387,7 +391,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
ZkStateReader.REPLICATION_FACTOR, "1",
|
||||
ZkStateReader.NUM_SHARDS_PROP, Integer.toString(numShards),
|
||||
"createNodeSet", "");
|
||||
ZkDistributedQueue q = overseers.get(0).getStateUpdateQueue();
|
||||
ZkDistributedQueue q = getOverseerZero().getStateUpdateQueue();
|
||||
q.offer(Utils.toJSON(m));
|
||||
}
|
||||
|
||||
|
@ -530,7 +534,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
ZkStateReader.NODE_NAME_PROP, "127.0.0.1:8983_solr");
|
||||
List<ZkWriteCommand> commands = new NodeMutator().downNode(reader.getClusterState(), m);
|
||||
|
||||
ZkDistributedQueue q = overseers.get(0).getStateUpdateQueue();
|
||||
ZkDistributedQueue q = getOverseerZero().getStateUpdateQueue();
|
||||
|
||||
q.offer(Utils.toJSON(m));
|
||||
|
||||
|
@ -586,7 +590,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
|
||||
overseerClient = electNewOverseer(server.getZkAddress());
|
||||
|
||||
ZkDistributedQueue q = overseers.get(0).getStateUpdateQueue();
|
||||
ZkDistributedQueue q = getOverseerZero().getStateUpdateQueue();
|
||||
|
||||
createCollection(COLLECTION, 1);
|
||||
|
||||
|
@ -640,7 +644,15 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
}
|
||||
|
||||
private Overseer getOpenOverseer() {
|
||||
return MiniSolrCloudCluster.getOpenOverseer(overseers);
|
||||
Overseer overseer = MiniSolrCloudCluster.getOpenOverseer(overseers);
|
||||
assertFalse(overseer.getDistributedClusterStateUpdater().isDistributedStateUpdate());
|
||||
return overseer;
|
||||
}
|
||||
|
||||
private Overseer getOverseerZero() {
|
||||
Overseer overseer = overseers.get(0);
|
||||
assertFalse(overseer.getDistributedClusterStateUpdater().isDistributedStateUpdate());
|
||||
return overseer;
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -739,7 +751,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
httpShardHandlerFactory.init(new PluginInfo("shardHandlerFactory", Collections.emptyMap()));
|
||||
httpShardHandlerFactorys.add(httpShardHandlerFactory);
|
||||
Overseer overseer = new Overseer((HttpShardHandler) httpShardHandlerFactory.getShardHandler(), updateShardHandler, "/admin/cores", reader, zkController,
|
||||
new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983, "").build());
|
||||
new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983, "").setUseDistributedClusterStateUpdates(false).build());
|
||||
overseers.add(overseer);
|
||||
ElectionContext ec = new OverseerElectionContext(zkClient, overseer,
|
||||
server.getZkAddress().replaceAll("/", "_"));
|
||||
|
@ -897,7 +909,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
|
||||
electNewOverseer(server.getZkAddress());
|
||||
|
||||
// Create collection znode before repeatedly trying to enqueue the Cluster state change message
|
||||
// Create collection znode before repeatedly trying to enqueue the cluster state update message
|
||||
zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/" + COLLECTION, true);
|
||||
|
||||
for (int i = 0; i < atLeast(4); i++) {
|
||||
|
@ -914,7 +926,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
ZkStateReader.REPLICATION_FACTOR, "1",
|
||||
ZkStateReader.NUM_SHARDS_PROP, "1",
|
||||
"createNodeSet", "");
|
||||
ZkDistributedQueue q = MiniSolrCloudCluster.getOpenOverseer(overseers).getStateUpdateQueue();
|
||||
ZkDistributedQueue q = getOpenOverseer().getStateUpdateQueue();
|
||||
q.offer(Utils.toJSON(m));
|
||||
break;
|
||||
} catch (SolrException | KeeperException | AlreadyClosedException e) {
|
||||
|
@ -1104,7 +1116,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
ZkStateReader.NUM_SHARDS_PROP, "1",
|
||||
ZkStateReader.REPLICATION_FACTOR, "1"
|
||||
);
|
||||
ZkDistributedQueue q = overseers.get(0).getStateUpdateQueue();
|
||||
ZkDistributedQueue q = getOverseerZero().getStateUpdateQueue();
|
||||
q.offer(Utils.toJSON(m));
|
||||
}
|
||||
|
||||
|
@ -1117,7 +1129,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
ZkStateReader.CORE_NODE_NAME_PROP, "node1",
|
||||
ZkStateReader.COLLECTION_PROP, "perf" + j,
|
||||
ZkStateReader.NUM_SHARDS_PROP, "1");
|
||||
ZkDistributedQueue q = overseers.get(0).getStateUpdateQueue();
|
||||
ZkDistributedQueue q = getOverseerZero().getStateUpdateQueue();
|
||||
q.offer(Utils.toJSON(m));
|
||||
if (j >= MAX_COLLECTIONS - 1) j = 0;
|
||||
if (k >= MAX_CORES - 1) k = 0;
|
||||
|
@ -1216,7 +1228,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
ZkStateReader.ROLES_PROP, "",
|
||||
ZkStateReader.STATE_PROP, Replica.State.RECOVERING.toString());
|
||||
queue.offer(Utils.toJSON(m));
|
||||
m = new ZkNodeProps(Overseer.QUEUE_OPERATION, "state",
|
||||
m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.STATE.toLower(),
|
||||
ZkStateReader.NODE_NAME_PROP, "node1:8983_",
|
||||
ZkStateReader.SHARD_ID_PROP, "shard1",
|
||||
ZkStateReader.COLLECTION_PROP, COLLECTION,
|
||||
|
@ -1228,7 +1240,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
overseerClient = electNewOverseer(server.getZkAddress());
|
||||
|
||||
//submit to proper queue
|
||||
queue = overseers.get(0).getStateUpdateQueue();
|
||||
queue = getOverseerZero().getStateUpdateQueue();
|
||||
m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.STATE.toLower(),
|
||||
ZkStateReader.NODE_NAME_PROP, "127.0.0.1:8983_solr",
|
||||
ZkStateReader.SHARD_ID_PROP, "shard1",
|
||||
|
@ -1265,7 +1277,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
|
||||
overseerClient = electNewOverseer(server.getZkAddress());
|
||||
|
||||
ZkDistributedQueue q = overseers.get(0).getStateUpdateQueue();
|
||||
ZkDistributedQueue q = getOverseerZero().getStateUpdateQueue();
|
||||
|
||||
createCollection("c1", 1);
|
||||
|
||||
|
@ -1383,8 +1395,9 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
|
||||
ZkController zkController = createMockZkController(address, null, reader);
|
||||
zkControllers.add(zkController);
|
||||
// Create an Overseer with associated configuration to NOT USE distributed state update. Tests in this class really test the Overseer.
|
||||
Overseer overseer = new Overseer((HttpShardHandler) httpShardHandlerFactory.getShardHandler(), updateShardHandler, "/admin/cores", reader, zkController,
|
||||
new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983, "").build());
|
||||
new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983, "").setUseDistributedClusterStateUpdates(false).build());
|
||||
overseers.add(overseer);
|
||||
ElectionContext ec = new OverseerElectionContext(zkClient, overseer,
|
||||
address.replaceAll("/", "_"));
|
||||
|
@ -1464,7 +1477,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
|
||||
overseerClient = electNewOverseer(server.getZkAddress());
|
||||
|
||||
ZkDistributedQueue q = overseers.get(0).getStateUpdateQueue();
|
||||
ZkDistributedQueue q = getOverseerZero().getStateUpdateQueue();
|
||||
|
||||
// create collection
|
||||
{
|
||||
|
|
|
@ -181,8 +181,15 @@ public class TestRandomRequestDistribution extends AbstractFullDistribZkTestBase
|
|||
if (log.isInfoEnabled()) {
|
||||
log.info("Forcing {} to go into 'down' state", notLeader.getStr(ZkStateReader.CORE_NAME_PROP));
|
||||
}
|
||||
ZkDistributedQueue q = jettys.get(0).getCoreContainer().getZkController().getOverseer().getStateUpdateQueue();
|
||||
q.offer(Utils.toJSON(m));
|
||||
|
||||
final Overseer overseer = jettys.get(0).getCoreContainer().getZkController().getOverseer();
|
||||
if (overseer.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
overseer.getDistributedClusterStateUpdater().doSingleStateUpdate(
|
||||
DistributedClusterStateUpdater.MutatingCommand.ReplicaSetState, m, overseer.getSolrCloudManager(), overseer.getZkStateReader());
|
||||
} else {
|
||||
ZkDistributedQueue q = overseer.getStateUpdateQueue();
|
||||
q.offer(Utils.toJSON(m));
|
||||
}
|
||||
|
||||
verifyReplicaStatus(cloudClient.getZkStateReader(), "football", "shard1", notLeader.getName(), Replica.State.DOWN);
|
||||
|
||||
|
|
|
@ -121,7 +121,15 @@ public class TestSkipOverseerOperations extends SolrCloudTestCase {
|
|||
waitForState("Expected 2x1 for collection: " + collection, collection,
|
||||
clusterShape(2, 2));
|
||||
CollectionAdminResponse resp2 = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient());
|
||||
assertEquals(getNumLeaderOpeations(resp), getNumLeaderOpeations(resp2));
|
||||
|
||||
// When cluster state updates are done in a distributed way, the stats that this test is verifying are not available.
|
||||
// See comment in OverseerStatusCmd.call().
|
||||
// Keeping the rest of the test running in case other errors can happen and can be caught...
|
||||
// Eventually maintain per node cluster state updates stats and be able to check them here? Longer term question...
|
||||
|
||||
if (!cluster.getOpenOverseer().getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
assertEquals(getNumLeaderOpeations(resp), getNumLeaderOpeations(resp2));
|
||||
}
|
||||
CollectionAdminRequest.deleteCollection(collection).process(cluster.getSolrClient());
|
||||
}
|
||||
|
||||
|
@ -186,15 +194,30 @@ public class TestSkipOverseerOperations extends SolrCloudTestCase {
|
|||
waitForState("Expected 2x2 for collection: " + collection, collection,
|
||||
clusterShape(2, 4));
|
||||
CollectionAdminResponse resp2 = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient());
|
||||
// 2 for recovering state, 4 for active state
|
||||
assertEquals(getNumStateOpeations(resp) + 6, getNumStateOpeations(resp2));
|
||||
|
||||
// See comment in testSkipLeaderOperations() above why this assert is skipped
|
||||
if (!cluster.getOpenOverseer().getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
// 2 for recovering state, 4 for active state
|
||||
assertEquals(getNumStateOpeations(resp) + 6, getNumStateOpeations(resp2));
|
||||
}
|
||||
CollectionAdminRequest.deleteCollection(collection).process(cluster.getSolrClient());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the value corresponding to stat: "overseer_operations", "leader", "requests"
|
||||
* This stat (see {@link org.apache.solr.cloud.api.collections.OverseerStatusCmd} is updated when the cluster state
|
||||
* updater processes a message of type {@link org.apache.solr.cloud.overseer.OverseerAction#LEADER} to set a shard leader<p>
|
||||
*
|
||||
* The update happens in org.apache.solr.cloud.Overseer.ClusterStateUpdater.processQueueItem()
|
||||
*/
|
||||
private int getNumLeaderOpeations(CollectionAdminResponse resp) {
|
||||
return (int) resp.getResponse().findRecursive("overseer_operations", "leader", "requests");
|
||||
}
|
||||
|
||||
/**
|
||||
* "state" stats are when Overseer processes a {@link org.apache.solr.cloud.overseer.OverseerAction#STATE} message
|
||||
* that sets replica properties
|
||||
*/
|
||||
private int getNumStateOpeations(CollectionAdminResponse resp) {
|
||||
return (int) resp.getResponse().findRecursive("overseer_operations", "state", "requests");
|
||||
}
|
||||
|
|
|
@ -288,7 +288,12 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
|
|||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION,
|
||||
CollectionParams.CollectionAction.CREATE.toLower(), ZkStateReader.NODE_NAME_PROP, nodeName, ZkStateReader.NUM_SHARDS_PROP, "1",
|
||||
"name", collectionName);
|
||||
zkController.getOverseerJobQueue().offer(Utils.toJSON(m));
|
||||
if (zkController.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
zkController.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.ClusterCreateCollection, m,
|
||||
zkController.getSolrCloudManager(), zkController.getZkStateReader());
|
||||
} else {
|
||||
zkController.getOverseerJobQueue().offer(Utils.toJSON(m));
|
||||
}
|
||||
|
||||
HashMap<String, Object> propMap = new HashMap<>();
|
||||
propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
|
||||
|
@ -297,7 +302,12 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
|
|||
propMap.put(ZkStateReader.NODE_NAME_PROP, "non_existent_host1");
|
||||
propMap.put(ZkStateReader.CORE_NAME_PROP, collectionName);
|
||||
propMap.put(ZkStateReader.STATE_PROP, "active");
|
||||
zkController.getOverseerJobQueue().offer(Utils.toJSON(propMap));
|
||||
if (zkController.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
zkController.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.SliceAddReplica, new ZkNodeProps(propMap),
|
||||
zkController.getSolrCloudManager(), zkController.getZkStateReader());
|
||||
} else {
|
||||
zkController.getOverseerJobQueue().offer(Utils.toJSON(propMap));
|
||||
}
|
||||
|
||||
propMap = new HashMap<>();
|
||||
propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
|
||||
|
@ -306,7 +316,12 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
|
|||
propMap.put(ZkStateReader.NODE_NAME_PROP, "non_existent_host2");
|
||||
propMap.put(ZkStateReader.CORE_NAME_PROP, collectionName);
|
||||
propMap.put(ZkStateReader.STATE_PROP, "down");
|
||||
zkController.getOverseerJobQueue().offer(Utils.toJSON(propMap));
|
||||
if (zkController.getDistributedClusterStateUpdater().isDistributedStateUpdate()) {
|
||||
zkController.getDistributedClusterStateUpdater().doSingleStateUpdate(DistributedClusterStateUpdater.MutatingCommand.SliceAddReplica, new ZkNodeProps(propMap),
|
||||
zkController.getSolrCloudManager(), zkController.getZkStateReader());
|
||||
} else {
|
||||
zkController.getOverseerJobQueue().offer(Utils.toJSON(propMap));
|
||||
}
|
||||
|
||||
zkController.getZkStateReader().forciblyRefreshAllClusterStateSlow();
|
||||
|
||||
|
|
|
@ -45,6 +45,7 @@
|
|||
<int name="distribUpdateConnTimeout">${distribUpdateConnTimeout:60000}</int>
|
||||
<str name="zkCredentialsProvider">${zkCredentialsProvider:org.apache.solr.common.cloud.DefaultZkCredentialsProvider}</str>
|
||||
<str name="zkACLProvider">${zkACLProvider:org.apache.solr.common.cloud.DefaultZkACLProvider}</str>
|
||||
<bool name="distributedClusterStateUpdates">${distributedClusterStateUpdates:false}</bool>
|
||||
|
||||
</solrcloud>
|
||||
|
||||
|
|
|
@ -42,6 +42,7 @@ You can find `solr.xml` in your `$SOLR_HOME` directory (usually `server/solr` or
|
|||
<int name="distribUpdateConnTimeout">${distribUpdateConnTimeout:60000}</int>
|
||||
<str name="zkCredentialsProvider">${zkCredentialsProvider:org.apache.solr.common.cloud.DefaultZkCredentialsProvider}</str>
|
||||
<str name="zkACLProvider">${zkACLProvider:org.apache.solr.common.cloud.DefaultZkACLProvider}</str>
|
||||
<bool name="distributedClusterStateUpdates">${distributedClusterStateUpdates:false}</bool>
|
||||
</solrcloud>
|
||||
|
||||
<shardHandlerFactory name="shardHandlerFactory"
|
||||
|
@ -163,6 +164,9 @@ If `TRUE`, node names are not based on the address of the node, but on a generic
|
|||
Optional parameters that can be specified if you are using <<zookeeper-access-control.adoc#,ZooKeeper Access Control>>.
|
||||
|
||||
|
||||
`distributedClusterStateUpdates`::
|
||||
If `TRUE`, the internal behavior of SolrCloud is changed to not use the Overseer for collections' `state.json` updates but do this directly against ZooKeeper.
|
||||
|
||||
=== The <logging> Element
|
||||
|
||||
`class`::
|
||||
|
|
|
@ -42,7 +42,6 @@ public class PerReplicaStatesOps {
|
|||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
private PerReplicaStates rs;
|
||||
List<PerReplicaStates.Operation> ops;
|
||||
private boolean preOp = true;
|
||||
final Function<PerReplicaStates, List<PerReplicaStates.Operation>> fun;
|
||||
|
||||
PerReplicaStatesOps(Function<PerReplicaStates, List<PerReplicaStates.Operation>> fun) {
|
||||
|
@ -255,7 +254,6 @@ public class PerReplicaStatesOps {
|
|||
}
|
||||
return operations;
|
||||
});
|
||||
result.preOp = false;
|
||||
result.ops = result.refresh(null);
|
||||
return result;
|
||||
}
|
||||
|
@ -277,13 +275,6 @@ public class PerReplicaStatesOps {
|
|||
return ops;
|
||||
}
|
||||
|
||||
/**
|
||||
* To be executed before collection state.json is persisted
|
||||
*/
|
||||
public boolean isPreOp() {
|
||||
return preOp;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method should compute the set of ZK operations for a given action
|
||||
* for instance, a state change may result in 2 operations on per-replica states (1 CREATE and 1 DELETE)
|
||||
|
|
|
@ -410,7 +410,7 @@ public class ZkStateReader implements SolrCloseable {
|
|||
if (log.isDebugEnabled()) {
|
||||
log.debug("Server older than client {}<{}", collection.getZNodeVersion(), version);
|
||||
}
|
||||
DocCollection nu = getCollectionLive(this, coll);
|
||||
DocCollection nu = getCollectionLive(coll);
|
||||
if (nu == null) return -1;
|
||||
if (nu.getZNodeVersion() > collection.getZNodeVersion()) {
|
||||
if (updateWatchedCollection(coll, nu)) {
|
||||
|
@ -680,7 +680,7 @@ public class ZkStateReader implements SolrCloseable {
|
|||
}
|
||||
}
|
||||
if (shouldFetch) {
|
||||
cachedDocCollection = getCollectionLive(ZkStateReader.this, collName);
|
||||
cachedDocCollection = getCollectionLive(collName);
|
||||
lastUpdateTime = System.nanoTime();
|
||||
}
|
||||
}
|
||||
|
@ -1193,7 +1193,7 @@ public class ZkStateReader implements SolrCloseable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Watches a single collection's format2 state.json.
|
||||
* Watches a single collection's state.json.
|
||||
*/
|
||||
class StateWatcher implements Watcher {
|
||||
private final String coll;
|
||||
|
@ -1446,9 +1446,9 @@ public class ZkStateReader implements SolrCloseable {
|
|||
}
|
||||
}
|
||||
|
||||
public static DocCollection getCollectionLive(ZkStateReader zkStateReader, String coll) {
|
||||
public DocCollection getCollectionLive(String coll) {
|
||||
try {
|
||||
return zkStateReader.fetchCollectionState(coll, null);
|
||||
return fetchCollectionState(coll, null);
|
||||
} catch (KeeperException e) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Could not load collection from ZK: " + coll, e);
|
||||
} catch (InterruptedException e) {
|
||||
|
|
|
@ -120,6 +120,7 @@ public class MiniSolrCloudCluster {
|
|||
" <str name=\"zkACLProvider\">${zkACLProvider:org.apache.solr.common.cloud.DefaultZkACLProvider}</str> \n" +
|
||||
" <str name=\"pkiHandlerPrivateKeyPath\">${pkiHandlerPrivateKeyPath:cryptokeys/priv_key512_pkcs8.pem}</str> \n" +
|
||||
" <str name=\"pkiHandlerPublicKeyPath\">${pkiHandlerPublicKeyPath:cryptokeys/pub_key512.der}</str> \n" +
|
||||
" <str name=\"distributedClusterStateUpdates\">${solr.distributedClusterStateUpdates:false}</str> \n" +
|
||||
" </solrcloud>\n" +
|
||||
// NOTE: this turns off the metrics collection unless overriden by a sysprop
|
||||
" <metrics enabled=\"${metricsEnabled:false}\">\n" +
|
||||
|
|
|
@ -35,6 +35,7 @@ import java.util.concurrent.atomic.AtomicReference;
|
|||
import java.util.function.Consumer;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.client.solrj.SolrRequest;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
|
@ -109,6 +110,7 @@ public class SolrCloudTestCase extends SolrTestCaseJ4 {
|
|||
private Map<String, Object> clusterProperties = new HashMap<>();
|
||||
|
||||
private boolean trackJettyMetrics;
|
||||
private boolean useDistributedClusterStateUpdate;
|
||||
|
||||
/**
|
||||
* Create a builder
|
||||
|
@ -119,6 +121,8 @@ public class SolrCloudTestCase extends SolrTestCaseJ4 {
|
|||
public Builder(int nodeCount, Path baseDir) {
|
||||
this.nodeCount = nodeCount;
|
||||
this.baseDir = baseDir;
|
||||
// By default the MiniSolrCloudCluster being built will randomly (seed based) decide which cluster update strategy to use
|
||||
this.useDistributedClusterStateUpdate = LuceneTestCase.random().nextInt(2) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -186,6 +190,48 @@ public class SolrCloudTestCase extends SolrTestCaseJ4 {
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method makes the MiniSolrCloudCluster use the "other" cluster state update strategy than it normally would.
|
||||
* When some test classes call this method (and some don't) we make sure that a run of multiple tests with a single
|
||||
* seed will exercise both code lines (distributed updates and Overseer based updates) so regressions can be spotted
|
||||
* faster.<p>
|
||||
*
|
||||
* The real need is for a few tests covering reasonable use cases to call this method. If you're adding a new test,
|
||||
* you don't have to call it (but it's ok if you do).
|
||||
*/
|
||||
public Builder useOtherClusterStateUpdateStrategy() {
|
||||
useDistributedClusterStateUpdate = !useDistributedClusterStateUpdate;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Force the cluster state update strategy to be either Overseer based or distributed. <b>This method can be useful when
|
||||
* debugging tests</b> failing in only one of the two modes to have all local runs exhibit the issue, as well obviously for
|
||||
* tests that are not compatible with one of the two modes.
|
||||
* <p>
|
||||
* If this method is not called, the strategy being used will be random if the configuration passed to the cluster
|
||||
* ({@code solr.xml} equivalent) contains a placeholder similar to:
|
||||
* <pre>
|
||||
* {@code
|
||||
* <solrcloud>
|
||||
* ....
|
||||
* <str name="distributedClusterStateUpdates">${solr.distributedClusterStateUpdates:false}</str>
|
||||
* ....
|
||||
* </solrcloud>
|
||||
* }</pre>
|
||||
* For an example of a configuration supporting this setting, see {@link MiniSolrCloudCluster#DEFAULT_CLOUD_SOLR_XML}.
|
||||
* When a test sets a different {@code solr.xml} config (using {@link #withSolrXml}), if the config does not contain
|
||||
* the placeholder, the strategy will be defined by the value assigned to {@code useDistributedClusterStateUpdates}
|
||||
* in {@link org.apache.solr.core.CloudConfig.CloudConfigBuilder}.
|
||||
*
|
||||
* @param distributed When {@code true}, cluster state updates are handled in a distributed way by nodes. When
|
||||
* {@code false}, cluster state updates are handled by Overseer.
|
||||
*/
|
||||
public Builder withDistributedClusterStateUpdates(boolean distributed) {
|
||||
useDistributedClusterStateUpdate = distributed;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a cluster property
|
||||
*
|
||||
|
@ -217,6 +263,14 @@ public class SolrCloudTestCase extends SolrTestCaseJ4 {
|
|||
* @throws Exception if an error occurs on startup
|
||||
*/
|
||||
public MiniSolrCloudCluster build() throws Exception {
|
||||
// This will have an impact on how the MiniSolrCloudCluster and therefore the test run if the config being
|
||||
// used does have the appropriate placeholder.
|
||||
// It is a good place to hard code true or false instead of useDistributedClusterStateUpdate to run all qualifying
|
||||
// tests with a given cluster state update strategy (non qualifying tests will use the default value assigned to
|
||||
// useDistributedClusterStateUpdates in org.apache.solr.core.CloudConfig.CloudConfigBuilder, so if you really want
|
||||
// ALL tests to run with a given strategy, patch it there too (and revert before commit!)
|
||||
System.setProperty("solr.distributedClusterStateUpdates", Boolean.toString(useDistributedClusterStateUpdate));
|
||||
|
||||
JettyConfig jettyConfig = jettyConfigBuilder.build();
|
||||
MiniSolrCloudCluster cluster = new MiniSolrCloudCluster(nodeCount, baseDir, solrxml, jettyConfig,
|
||||
null, securityJson, trackJettyMetrics);
|
||||
|
|
Loading…
Reference in New Issue