mirror of https://github.com/apache/lucene.git
SOLR-12187: Replica should watch clusterstate and unload itself if its entry is removed
This commit is contained in:
parent
f7f12a51f3
commit
09db13f4f4
|
@ -164,6 +164,8 @@ Bug Fixes
|
||||||
|
|
||||||
* SOLR-10169: PeerSync will hit an NPE on no response errors when looking for fingerprint. (Erick Erickson)
|
* SOLR-10169: PeerSync will hit an NPE on no response errors when looking for fingerprint. (Erick Erickson)
|
||||||
|
|
||||||
|
* SOLR-12187: Replica should watch clusterstate and unload itself if its entry is removed (Cao Manh Dat)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -38,6 +38,7 @@ import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.Callable;
|
import java.util.concurrent.Callable;
|
||||||
|
@ -65,6 +66,7 @@ import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.SolrException.ErrorCode;
|
import org.apache.solr.common.SolrException.ErrorCode;
|
||||||
import org.apache.solr.common.cloud.BeforeReconnect;
|
import org.apache.solr.common.cloud.BeforeReconnect;
|
||||||
import org.apache.solr.common.cloud.ClusterState;
|
import org.apache.solr.common.cloud.ClusterState;
|
||||||
|
import org.apache.solr.common.cloud.CollectionStateWatcher;
|
||||||
import org.apache.solr.common.cloud.DefaultConnectionStrategy;
|
import org.apache.solr.common.cloud.DefaultConnectionStrategy;
|
||||||
import org.apache.solr.common.cloud.DefaultZkACLProvider;
|
import org.apache.solr.common.cloud.DefaultZkACLProvider;
|
||||||
import org.apache.solr.common.cloud.DefaultZkCredentialsProvider;
|
import org.apache.solr.common.cloud.DefaultZkCredentialsProvider;
|
||||||
|
@ -1033,42 +1035,39 @@ public class ZkController {
|
||||||
try {
|
try {
|
||||||
// pre register has published our down state
|
// pre register has published our down state
|
||||||
final String baseUrl = getBaseUrl();
|
final String baseUrl = getBaseUrl();
|
||||||
|
|
||||||
final CloudDescriptor cloudDesc = desc.getCloudDescriptor();
|
final CloudDescriptor cloudDesc = desc.getCloudDescriptor();
|
||||||
final String collection = cloudDesc.getCollectionName();
|
final String collection = cloudDesc.getCollectionName();
|
||||||
|
final String shardId = cloudDesc.getShardId();
|
||||||
final String coreZkNodeName = desc.getCloudDescriptor().getCoreNodeName();
|
final String coreZkNodeName = cloudDesc.getCoreNodeName();
|
||||||
assert coreZkNodeName != null : "we should have a coreNodeName by now";
|
assert coreZkNodeName != null : "we should have a coreNodeName by now";
|
||||||
|
|
||||||
|
// check replica's existence in clusterstate first
|
||||||
|
try {
|
||||||
|
zkStateReader.waitForState(collection, Overseer.isLegacy(zkStateReader) ? 60000 : 100,
|
||||||
|
TimeUnit.MILLISECONDS, (liveNodes, collectionState) -> getReplicaOrNull(collectionState, shardId, coreZkNodeName) != null);
|
||||||
|
} catch (TimeoutException e) {
|
||||||
|
throw new SolrException(ErrorCode.SERVER_ERROR, "Error registering SolrCore, timeout waiting for replica present in clusterstate");
|
||||||
|
}
|
||||||
|
Replica replica = getReplicaOrNull(zkStateReader.getClusterState().getCollectionOrNull(collection), shardId, coreZkNodeName);
|
||||||
|
if (replica == null) {
|
||||||
|
throw new SolrException(ErrorCode.SERVER_ERROR, "Error registering SolrCore, replica is removed from clusterstate");
|
||||||
|
}
|
||||||
|
|
||||||
ZkShardTerms shardTerms = getShardTerms(collection, cloudDesc.getShardId());
|
ZkShardTerms shardTerms = getShardTerms(collection, cloudDesc.getShardId());
|
||||||
|
|
||||||
// This flag is used for testing rolling updates and should be removed in SOLR-11812
|
// This flag is used for testing rolling updates and should be removed in SOLR-11812
|
||||||
boolean isRunningInNewLIR = "new".equals(desc.getCoreProperty("lirVersion", "new"));
|
boolean isRunningInNewLIR = "new".equals(desc.getCoreProperty("lirVersion", "new"));
|
||||||
if (isRunningInNewLIR && cloudDesc.getReplicaType() != Type.PULL) {
|
if (isRunningInNewLIR && replica.getType() != Type.PULL) {
|
||||||
shardTerms.registerTerm(coreZkNodeName);
|
shardTerms.registerTerm(coreZkNodeName);
|
||||||
}
|
}
|
||||||
String shardId = cloudDesc.getShardId();
|
|
||||||
Map<String,Object> props = new HashMap<>();
|
|
||||||
// we only put a subset of props into the leader node
|
|
||||||
props.put(ZkStateReader.BASE_URL_PROP, baseUrl);
|
|
||||||
props.put(ZkStateReader.CORE_NAME_PROP, coreName);
|
|
||||||
props.put(ZkStateReader.NODE_NAME_PROP, getNodeName());
|
|
||||||
|
|
||||||
log.debug("Register replica - core:{} address:{} collection:{} shard:{}",
|
log.debug("Register replica - core:{} address:{} collection:{} shard:{}",
|
||||||
coreName, baseUrl, cloudDesc.getCollectionName(), shardId);
|
coreName, baseUrl, collection, shardId);
|
||||||
|
|
||||||
ZkNodeProps leaderProps = new ZkNodeProps(props);
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// If we're a preferred leader, insert ourselves at the head of the queue
|
// If we're a preferred leader, insert ourselves at the head of the queue
|
||||||
boolean joinAtHead = false;
|
boolean joinAtHead = replica.getBool(SliceMutator.PREFERRED_LEADER_PROP, false);
|
||||||
final DocCollection docCollection = zkStateReader.getClusterState().getCollectionOrNull(collection);
|
if (replica.getType() != Type.PULL) {
|
||||||
Replica replica = (docCollection == null) ? null : docCollection.getReplica(coreZkNodeName);
|
|
||||||
if (replica != null) {
|
|
||||||
joinAtHead = replica.getBool(SliceMutator.PREFERRED_LEADER_PROP, false);
|
|
||||||
}
|
|
||||||
//TODO WHy would replica be null?
|
|
||||||
if (replica == null || replica.getType() != Type.PULL) {
|
|
||||||
joinElection(desc, afterExpiration, joinAtHead);
|
joinElection(desc, afterExpiration, joinAtHead);
|
||||||
} else if (replica.getType() == Type.PULL) {
|
} else if (replica.getType() == Type.PULL) {
|
||||||
if (joinAtHead) {
|
if (joinAtHead) {
|
||||||
|
@ -1093,9 +1092,8 @@ public class ZkController {
|
||||||
String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName);
|
String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName);
|
||||||
log.debug("We are " + ourUrl + " and leader is " + leaderUrl);
|
log.debug("We are " + ourUrl + " and leader is " + leaderUrl);
|
||||||
boolean isLeader = leaderUrl.equals(ourUrl);
|
boolean isLeader = leaderUrl.equals(ourUrl);
|
||||||
Replica.Type replicaType = zkStateReader.getClusterState().getCollection(collection).getReplica(coreZkNodeName).getType();
|
assert !(isLeader && replica.getType() == Type.PULL) : "Pull replica became leader!";
|
||||||
assert !(isLeader && replicaType == Type.PULL): "Pull replica became leader!";
|
|
||||||
|
|
||||||
try (SolrCore core = cc.getCore(desc.getName())) {
|
try (SolrCore core = cc.getCore(desc.getName())) {
|
||||||
|
|
||||||
// recover from local transaction log and wait for it to complete before
|
// recover from local transaction log and wait for it to complete before
|
||||||
|
@ -1105,7 +1103,7 @@ public class ZkController {
|
||||||
// leader election perhaps?
|
// leader election perhaps?
|
||||||
|
|
||||||
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
|
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
|
||||||
boolean isTlogReplicaAndNotLeader = replicaType == Replica.Type.TLOG && !isLeader;
|
boolean isTlogReplicaAndNotLeader = replica.getType() == Replica.Type.TLOG && !isLeader;
|
||||||
if (isTlogReplicaAndNotLeader) {
|
if (isTlogReplicaAndNotLeader) {
|
||||||
String commitVersion = ReplicateFromLeader.getCommitVersion(core);
|
String commitVersion = ReplicateFromLeader.getCommitVersion(core);
|
||||||
if (commitVersion != null) {
|
if (commitVersion != null) {
|
||||||
|
@ -1138,23 +1136,40 @@ public class ZkController {
|
||||||
publish(desc, Replica.State.ACTIVE);
|
publish(desc, Replica.State.ACTIVE);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isRunningInNewLIR && replicaType != Type.PULL) {
|
if (isRunningInNewLIR && replica.getType() != Type.PULL) {
|
||||||
|
// the watcher is added to a set so multiple calls of this method will left only one watcher
|
||||||
shardTerms.addListener(new RecoveringCoreTermWatcher(core.getCoreDescriptor(), getCoreContainer()));
|
shardTerms.addListener(new RecoveringCoreTermWatcher(core.getCoreDescriptor(), getCoreContainer()));
|
||||||
}
|
}
|
||||||
core.getCoreDescriptor().getCloudDescriptor().setHasRegistered(true);
|
core.getCoreDescriptor().getCloudDescriptor().setHasRegistered(true);
|
||||||
|
} catch (Exception e) {
|
||||||
|
unregister(coreName, desc, false);
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
// make sure we have an update cluster state right away
|
// make sure we have an update cluster state right away
|
||||||
zkStateReader.forceUpdateCollection(collection);
|
zkStateReader.forceUpdateCollection(collection);
|
||||||
|
// the watcher is added to a set so multiple calls of this method will left only one watcher
|
||||||
|
zkStateReader.registerCollectionStateWatcher(cloudDesc.getCollectionName(),
|
||||||
|
new UnloadCoreOnDeletedWatcher(coreZkNodeName, shardId, desc.getName()));
|
||||||
return shardId;
|
return shardId;
|
||||||
} catch (Exception e) {
|
|
||||||
unregister(coreName, desc, false);
|
|
||||||
throw e;
|
|
||||||
} finally {
|
} finally {
|
||||||
MDCLoggingContext.clear();
|
MDCLoggingContext.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Replica getReplicaOrNull(DocCollection docCollection, String shard, String coreNodeName) {
|
||||||
|
if (docCollection == null) return null;
|
||||||
|
|
||||||
|
Slice slice = docCollection.getSlice(shard);
|
||||||
|
if (slice == null) return null;
|
||||||
|
|
||||||
|
Replica replica = slice.getReplica(coreNodeName);
|
||||||
|
if (replica == null) return null;
|
||||||
|
if (!getNodeName().equals(replica.getNodeName())) return null;
|
||||||
|
|
||||||
|
return replica;
|
||||||
|
}
|
||||||
|
|
||||||
public void startReplicationFromLeader(String coreName, boolean switchTransactionLog) throws InterruptedException {
|
public void startReplicationFromLeader(String coreName, boolean switchTransactionLog) throws InterruptedException {
|
||||||
log.info("{} starting background replication from leader", coreName);
|
log.info("{} starting background replication from leader", coreName);
|
||||||
ReplicateFromLeader replicateFromLeader = new ReplicateFromLeader(cc, coreName);
|
ReplicateFromLeader replicateFromLeader = new ReplicateFromLeader(cc, coreName);
|
||||||
|
@ -1359,11 +1374,7 @@ public class ZkController {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void publish(final CoreDescriptor cd, final Replica.State state) throws Exception {
|
public void publish(final CoreDescriptor cd, final Replica.State state) throws Exception {
|
||||||
publish(cd, state, true);
|
publish(cd, state, true, false);
|
||||||
}
|
|
||||||
|
|
||||||
public void publish(final CoreDescriptor cd, final Replica.State state, boolean updateLastState) throws Exception {
|
|
||||||
publish(cd, state, updateLastState, false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1430,6 +1441,9 @@ public class ZkController {
|
||||||
props.put(ZkStateReader.SHARD_ID_PROP, cd.getCloudDescriptor().getShardId());
|
props.put(ZkStateReader.SHARD_ID_PROP, cd.getCloudDescriptor().getShardId());
|
||||||
props.put(ZkStateReader.COLLECTION_PROP, collection);
|
props.put(ZkStateReader.COLLECTION_PROP, collection);
|
||||||
props.put(ZkStateReader.REPLICA_TYPE, cd.getCloudDescriptor().getReplicaType().toString());
|
props.put(ZkStateReader.REPLICA_TYPE, cd.getCloudDescriptor().getReplicaType().toString());
|
||||||
|
if (!Overseer.isLegacy(zkStateReader)) {
|
||||||
|
props.put(ZkStateReader.FORCE_SET_STATE_PROP, "false");
|
||||||
|
}
|
||||||
if (numShards != null) {
|
if (numShards != null) {
|
||||||
props.put(ZkStateReader.NUM_SHARDS_PROP, numShards.toString());
|
props.put(ZkStateReader.NUM_SHARDS_PROP, numShards.toString());
|
||||||
}
|
}
|
||||||
|
@ -1521,7 +1535,6 @@ public class ZkController {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
CloudDescriptor cloudDescriptor = cd.getCloudDescriptor();
|
CloudDescriptor cloudDescriptor = cd.getCloudDescriptor();
|
||||||
zkStateReader.unregisterCore(cloudDescriptor.getCollectionName());
|
|
||||||
if (removeCoreFromZk) {
|
if (removeCoreFromZk) {
|
||||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION,
|
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION,
|
||||||
OverseerAction.DELETECORE.toLower(), ZkStateReader.CORE_NAME_PROP, coreName,
|
OverseerAction.DELETECORE.toLower(), ZkStateReader.CORE_NAME_PROP, coreName,
|
||||||
|
@ -1653,7 +1666,6 @@ public class ZkController {
|
||||||
"Collection {} not visible yet, but flagging it so a watch is registered when it becomes visible" :
|
"Collection {} not visible yet, but flagging it so a watch is registered when it becomes visible" :
|
||||||
"Registering watch for collection {}",
|
"Registering watch for collection {}",
|
||||||
collectionName);
|
collectionName);
|
||||||
zkStateReader.registerCore(collectionName);
|
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
log.error("", e);
|
log.error("", e);
|
||||||
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
|
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
|
||||||
|
@ -2707,6 +2719,56 @@ public class ZkController {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private class UnloadCoreOnDeletedWatcher implements CollectionStateWatcher {
|
||||||
|
String coreNodeName;
|
||||||
|
String shard;
|
||||||
|
String coreName;
|
||||||
|
|
||||||
|
public UnloadCoreOnDeletedWatcher(String coreNodeName, String shard, String coreName) {
|
||||||
|
this.coreNodeName = coreNodeName;
|
||||||
|
this.shard = shard;
|
||||||
|
this.coreName = coreName;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
// synchronized due to SOLR-11535
|
||||||
|
public synchronized boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
|
||||||
|
if (getCoreContainer().getCoreDescriptor(coreName) == null) return true;
|
||||||
|
|
||||||
|
boolean replicaRemoved = getReplicaOrNull(collectionState, shard, coreNodeName) == null;
|
||||||
|
if (replicaRemoved) {
|
||||||
|
try {
|
||||||
|
log.info("Replica {} removed from clusterstate, remove it.", coreName);
|
||||||
|
getCoreContainer().unload(coreName, true, true, true);
|
||||||
|
} catch (SolrException e) {
|
||||||
|
if (!e.getMessage().contains("Cannot unload non-existent core")) {
|
||||||
|
// no need to log if the core was already unloaded
|
||||||
|
log.warn("Failed to unregister core:{}", coreName, e);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.warn("Failed to unregister core:{}", coreName, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return replicaRemoved;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
|
UnloadCoreOnDeletedWatcher that = (UnloadCoreOnDeletedWatcher) o;
|
||||||
|
return Objects.equals(coreNodeName, that.coreNodeName) &&
|
||||||
|
Objects.equals(shard, that.shard) &&
|
||||||
|
Objects.equals(coreName, that.coreName);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
|
||||||
|
return Objects.hash(coreNodeName, shard, coreName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Thrown during leader initiated recovery process if current node is not leader
|
* Thrown during leader initiated recovery process if current node is not leader
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -222,22 +222,6 @@ public class ZkContainer {
|
||||||
public ZkController getZkController() {
|
public ZkController getZkController() {
|
||||||
return zkController;
|
return zkController;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void publishCoresAsDown(List<SolrCore> cores) {
|
|
||||||
|
|
||||||
for (SolrCore core : cores) {
|
|
||||||
try {
|
|
||||||
zkController.publish(core.getCoreDescriptor(), Replica.State.DOWN);
|
|
||||||
} catch (KeeperException e) {
|
|
||||||
ZkContainer.log.error("", e);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
Thread.interrupted();
|
|
||||||
ZkContainer.log.error("", e);
|
|
||||||
} catch (Exception e) {
|
|
||||||
ZkContainer.log.error("", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void close() {
|
public void close() {
|
||||||
|
|
||||||
|
|
|
@ -40,7 +40,6 @@ import org.apache.solr.api.Api;
|
||||||
import org.apache.solr.client.solrj.SolrResponse;
|
import org.apache.solr.client.solrj.SolrResponse;
|
||||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||||
import org.apache.solr.client.solrj.impl.HttpSolrClient.Builder;
|
import org.apache.solr.client.solrj.impl.HttpSolrClient.Builder;
|
||||||
import org.apache.solr.client.solrj.request.CoreAdminRequest;
|
|
||||||
import org.apache.solr.client.solrj.request.CoreAdminRequest.RequestSyncShard;
|
import org.apache.solr.client.solrj.request.CoreAdminRequest.RequestSyncShard;
|
||||||
import org.apache.solr.client.solrj.response.RequestStatusState;
|
import org.apache.solr.client.solrj.response.RequestStatusState;
|
||||||
import org.apache.solr.client.solrj.util.SolrIdentifierValidator;
|
import org.apache.solr.client.solrj.util.SolrIdentifierValidator;
|
||||||
|
@ -282,7 +281,7 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
|
||||||
* In SOLR-11739 we change the way the async IDs are checked to decide if one has
|
* In SOLR-11739 we change the way the async IDs are checked to decide if one has
|
||||||
* already been used or not. For backward compatibility, we continue to check in the
|
* already been used or not. For backward compatibility, we continue to check in the
|
||||||
* old way (meaning, in all the queues) for now. This extra check should be removed
|
* old way (meaning, in all the queues) for now. This extra check should be removed
|
||||||
* in Solr 9
|
* in Solr 9
|
||||||
*/
|
*/
|
||||||
private static final boolean CHECK_ASYNC_ID_BACK_COMPAT_LOCATIONS = true;
|
private static final boolean CHECK_ASYNC_ID_BACK_COMPAT_LOCATIONS = true;
|
||||||
|
|
||||||
|
@ -306,7 +305,7 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
|
||||||
}
|
}
|
||||||
|
|
||||||
NamedList<String> r = new NamedList<>();
|
NamedList<String> r = new NamedList<>();
|
||||||
|
|
||||||
if (CHECK_ASYNC_ID_BACK_COMPAT_LOCATIONS && (
|
if (CHECK_ASYNC_ID_BACK_COMPAT_LOCATIONS && (
|
||||||
coreContainer.getZkController().getOverseerCompletedMap().contains(asyncId) ||
|
coreContainer.getZkController().getOverseerCompletedMap().contains(asyncId) ||
|
||||||
coreContainer.getZkController().getOverseerFailureMap().contains(asyncId) ||
|
coreContainer.getZkController().getOverseerFailureMap().contains(asyncId) ||
|
||||||
|
@ -1162,26 +1161,15 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
|
||||||
|
|
||||||
// Wait till we have an active leader
|
// Wait till we have an active leader
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
for (int i = 0; i < 10; i++) {
|
for (int i = 0; i < 9; i++) {
|
||||||
ZkCoreNodeProps zombieLeaderProps = getZombieLeader(zkController, collectionName, sliceId);
|
Thread.sleep(5000);
|
||||||
if (zombieLeaderProps != null) {
|
clusterState = handler.coreContainer.getZkController().getClusterState();
|
||||||
log.warn("A replica {} on node {} won the leader election, but not exist in clusterstate, " +
|
|
||||||
"remove it and waiting for another round of election",
|
|
||||||
zombieLeaderProps.getCoreName(), zombieLeaderProps.getNodeName());
|
|
||||||
try (HttpSolrClient solrClient = new HttpSolrClient.Builder(zombieLeaderProps.getBaseUrl()).build()) {
|
|
||||||
CoreAdminRequest.unloadCore(zombieLeaderProps.getCoreName(), solrClient);
|
|
||||||
}
|
|
||||||
// waiting for another election round
|
|
||||||
i = 0;
|
|
||||||
}
|
|
||||||
clusterState = zkController.getClusterState();
|
|
||||||
collection = clusterState.getCollection(collectionName);
|
collection = clusterState.getCollection(collectionName);
|
||||||
slice = collection.getSlice(sliceId);
|
slice = collection.getSlice(sliceId);
|
||||||
if (slice.getLeader() != null && slice.getLeader().getState() == State.ACTIVE) {
|
if (slice.getLeader() != null && slice.getLeader().getState() == State.ACTIVE) {
|
||||||
success = true;
|
success = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
Thread.sleep(5000);
|
|
||||||
log.warn("Force leader attempt {}. Waiting 5 secs for an active leader. State of the slice: {}", (i + 1), slice);
|
log.warn("Force leader attempt {}. Waiting 5 secs for an active leader. State of the slice: {}", (i + 1), slice);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1198,25 +1186,6 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Zombie leader is a replica won the election but does not exist in clusterstate
|
|
||||||
* @return null if the zombie leader does not exist
|
|
||||||
*/
|
|
||||||
private static ZkCoreNodeProps getZombieLeader(ZkController zkController, String collection, String shardId) {
|
|
||||||
try {
|
|
||||||
ZkCoreNodeProps leaderProps = zkController.getLeaderProps(collection, shardId, 1000);
|
|
||||||
DocCollection docCollection = zkController.getClusterState().getCollection(collection);
|
|
||||||
Replica replica = docCollection.getReplica(leaderProps.getNodeProps().getStr(ZkStateReader.CORE_NODE_NAME_PROP));
|
|
||||||
if (replica == null) return leaderProps;
|
|
||||||
if (!replica.getNodeName().equals(leaderProps.getNodeName())) {
|
|
||||||
return leaderProps;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
} catch (Exception e) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void waitForActiveCollection(String collectionName, CoreContainer cc, SolrResponse createCollResponse)
|
public static void waitForActiveCollection(String collectionName, CoreContainer cc, SolrResponse createCollResponse)
|
||||||
throws KeeperException, InterruptedException {
|
throws KeeperException, InterruptedException {
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.nio.file.Path;
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.concurrent.Semaphore;
|
import java.util.concurrent.Semaphore;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.TimeoutException;
|
import java.util.concurrent.TimeoutException;
|
||||||
|
@ -34,11 +35,13 @@ import org.apache.solr.client.solrj.request.CoreStatus;
|
||||||
import org.apache.solr.cloud.overseer.OverseerAction;
|
import org.apache.solr.cloud.overseer.OverseerAction;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
import org.apache.solr.common.cloud.CollectionStateWatcher;
|
||||||
import org.apache.solr.common.cloud.DocCollection;
|
import org.apache.solr.common.cloud.DocCollection;
|
||||||
import org.apache.solr.common.cloud.Replica;
|
import org.apache.solr.common.cloud.Replica;
|
||||||
import org.apache.solr.common.cloud.Slice;
|
import org.apache.solr.common.cloud.Slice;
|
||||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||||
import org.apache.solr.common.cloud.ZkStateReader;
|
import org.apache.solr.common.cloud.ZkStateReader;
|
||||||
|
import org.apache.solr.common.cloud.ZkStateReaderAccessor;
|
||||||
import org.apache.solr.common.util.TimeSource;
|
import org.apache.solr.common.util.TimeSource;
|
||||||
import org.apache.solr.common.util.Utils;
|
import org.apache.solr.common.util.Utils;
|
||||||
import org.apache.solr.core.ZkContainer;
|
import org.apache.solr.core.ZkContainer;
|
||||||
|
@ -86,12 +89,17 @@ public class DeleteReplicaTest extends SolrCloudTestCase {
|
||||||
assertTrue("Unexpected error message: " + e.getMessage(), e.getMessage().contains("state is 'active'"));
|
assertTrue("Unexpected error message: " + e.getMessage(), e.getMessage().contains("state is 'active'"));
|
||||||
assertTrue("Data directory for " + replica.getName() + " should not have been deleted", Files.exists(dataDir));
|
assertTrue("Data directory for " + replica.getName() + " should not have been deleted", Files.exists(dataDir));
|
||||||
|
|
||||||
|
JettySolrRunner replicaJetty = cluster.getReplicaJetty(replica);
|
||||||
|
ZkStateReaderAccessor accessor = new ZkStateReaderAccessor(replicaJetty.getCoreContainer().getZkController().getZkStateReader());
|
||||||
|
Set<CollectionStateWatcher> watchers = accessor.getStateWatchers(collectionName);
|
||||||
CollectionAdminRequest.deleteReplica(collectionName, shard.getName(), replica.getName())
|
CollectionAdminRequest.deleteReplica(collectionName, shard.getName(), replica.getName())
|
||||||
.process(cluster.getSolrClient());
|
.process(cluster.getSolrClient());
|
||||||
waitForState("Expected replica " + replica.getName() + " to have been removed", collectionName, (n, c) -> {
|
waitForState("Expected replica " + replica.getName() + " to have been removed", collectionName, (n, c) -> {
|
||||||
Slice testShard = c.getSlice(shard.getName());
|
Slice testShard = c.getSlice(shard.getName());
|
||||||
return testShard.getReplica(replica.getName()) == null;
|
return testShard.getReplica(replica.getName()) == null;
|
||||||
});
|
});
|
||||||
|
// the core no longer watch collection state since it was removed
|
||||||
|
assertEquals(watchers.size() - 1, accessor.getStateWatchers(collectionName).size());
|
||||||
|
|
||||||
assertFalse("Data directory for " + replica.getName() + " should have been removed", Files.exists(dataDir));
|
assertFalse("Data directory for " + replica.getName() + " should have been removed", Files.exists(dataDir));
|
||||||
|
|
||||||
|
@ -165,8 +173,63 @@ public class DeleteReplicaTest extends SolrCloudTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
public void deleteReplicaFromClusterState() throws Exception {
|
||||||
|
deleteReplicaFromClusterState("true");
|
||||||
|
deleteReplicaFromClusterState("false");
|
||||||
|
CollectionAdminRequest.setClusterProperty(ZkStateReader.LEGACY_CLOUD, null).process(cluster.getSolrClient());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void deleteReplicaFromClusterState(String legacyCloud) throws Exception {
|
||||||
|
CollectionAdminRequest.setClusterProperty(ZkStateReader.LEGACY_CLOUD, legacyCloud).process(cluster.getSolrClient());
|
||||||
|
final String collectionName = "deleteFromClusterState_"+legacyCloud;
|
||||||
|
CollectionAdminRequest.createCollection(collectionName, "conf", 1, 3)
|
||||||
|
.process(cluster.getSolrClient());
|
||||||
|
cluster.getSolrClient().add(collectionName, new SolrInputDocument("id", "1"));
|
||||||
|
cluster.getSolrClient().add(collectionName, new SolrInputDocument("id", "2"));
|
||||||
|
cluster.getSolrClient().commit(collectionName);
|
||||||
|
|
||||||
|
Slice shard = getCollectionState(collectionName).getSlice("shard1");
|
||||||
|
Replica replica = getRandomReplica(shard);
|
||||||
|
JettySolrRunner replicaJetty = cluster.getReplicaJetty(replica);
|
||||||
|
ZkStateReaderAccessor accessor = new ZkStateReaderAccessor(replicaJetty.getCoreContainer().getZkController().getZkStateReader());
|
||||||
|
Set<CollectionStateWatcher> watchers = accessor.getStateWatchers(collectionName);
|
||||||
|
|
||||||
|
ZkNodeProps m = new ZkNodeProps(
|
||||||
|
Overseer.QUEUE_OPERATION, OverseerAction.DELETECORE.toLower(),
|
||||||
|
ZkStateReader.CORE_NAME_PROP, replica.getCoreName(),
|
||||||
|
ZkStateReader.NODE_NAME_PROP, replica.getNodeName(),
|
||||||
|
ZkStateReader.COLLECTION_PROP, collectionName,
|
||||||
|
ZkStateReader.CORE_NODE_NAME_PROP, replica.getName(),
|
||||||
|
ZkStateReader.BASE_URL_PROP, replica.getBaseUrl());
|
||||||
|
Overseer.getStateUpdateQueue(cluster.getZkClient()).offer(Utils.toJSON(m));
|
||||||
|
|
||||||
|
waitForState("Timeout waiting for replica get deleted", collectionName,
|
||||||
|
(liveNodes, collectionState) -> collectionState.getSlice("shard1").getReplicas().size() == 2);
|
||||||
|
|
||||||
|
TimeOut timeOut = new TimeOut(60, TimeUnit.SECONDS, TimeSource.NANO_TIME);
|
||||||
|
timeOut.waitFor("Waiting for replica get unloaded", () ->
|
||||||
|
replicaJetty.getCoreContainer().getCoreDescriptor(replica.getCoreName()) == null
|
||||||
|
);
|
||||||
|
// the core no longer watch collection state since it was removed
|
||||||
|
timeOut = new TimeOut(60, TimeUnit.SECONDS, TimeSource.NANO_TIME);
|
||||||
|
timeOut.waitFor("Waiting for watcher get removed", () ->
|
||||||
|
watchers.size() - 1 == accessor.getStateWatchers(collectionName).size()
|
||||||
|
);
|
||||||
|
|
||||||
|
CollectionAdminRequest.deleteCollection(collectionName).process(cluster.getSolrClient());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@Slow
|
||||||
public void raceConditionOnDeleteAndRegisterReplica() throws Exception {
|
public void raceConditionOnDeleteAndRegisterReplica() throws Exception {
|
||||||
final String collectionName = "raceDeleteReplica";
|
raceConditionOnDeleteAndRegisterReplica("true");
|
||||||
|
raceConditionOnDeleteAndRegisterReplica("false");
|
||||||
|
CollectionAdminRequest.setClusterProperty(ZkStateReader.LEGACY_CLOUD, null).process(cluster.getSolrClient());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void raceConditionOnDeleteAndRegisterReplica(String legacyCloud) throws Exception {
|
||||||
|
CollectionAdminRequest.setClusterProperty(ZkStateReader.LEGACY_CLOUD, legacyCloud).process(cluster.getSolrClient());
|
||||||
|
final String collectionName = "raceDeleteReplica_"+legacyCloud;
|
||||||
CollectionAdminRequest.createCollection(collectionName, "conf", 1, 2)
|
CollectionAdminRequest.createCollection(collectionName, "conf", 1, 2)
|
||||||
.process(cluster.getSolrClient());
|
.process(cluster.getSolrClient());
|
||||||
waitForState("Expected 1x2 collections", collectionName, clusterShape(1, 2));
|
waitForState("Expected 1x2 collections", collectionName, clusterShape(1, 2));
|
||||||
|
@ -246,15 +309,16 @@ public class DeleteReplicaTest extends SolrCloudTestCase {
|
||||||
ZkContainer.testing_beforeRegisterInZk = null;
|
ZkContainer.testing_beforeRegisterInZk = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
while (true) {
|
||||||
waitForState("Timeout for replica:"+replica1.getName()+" register itself as DOWN after failed to register", collectionName, (liveNodes, collectionState) -> {
|
try {
|
||||||
Slice shard = collectionState.getSlice("shard1");
|
CollectionAdminRequest.addReplicaToShard(collectionName, "shard1")
|
||||||
Replica replica = shard.getReplica(replica1.getName());
|
.process(cluster.getSolrClient());
|
||||||
return replica != null && replica.getState() == DOWN;
|
break;
|
||||||
});
|
} catch (Exception e) {
|
||||||
|
// expected, when the node is not fully started
|
||||||
CollectionAdminRequest.addReplicaToShard(collectionName, "shard1")
|
Thread.sleep(500);
|
||||||
.process(cluster.getSolrClient());
|
}
|
||||||
|
}
|
||||||
waitForState("Expected 1x2 collections", collectionName, clusterShape(1, 2));
|
waitForState("Expected 1x2 collections", collectionName, clusterShape(1, 2));
|
||||||
|
|
||||||
String leaderJettyNodeName = leaderJetty.getNodeName();
|
String leaderJettyNodeName = leaderJetty.getNodeName();
|
||||||
|
|
|
@ -62,81 +62,6 @@ public class ForceLeaderTest extends HttpPartitionTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests that FORCELEADER can get an active leader even in the case there are a replica won the election but not present in clusterstate
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
@Slow
|
|
||||||
public void testZombieLeader() throws Exception {
|
|
||||||
String testCollectionName = "forceleader_zombie_leader_collection";
|
|
||||||
createCollection(testCollectionName, "conf1", 1, 3, 1);
|
|
||||||
cloudClient.setDefaultCollection(testCollectionName);
|
|
||||||
try {
|
|
||||||
List<Replica> notLeaders = ensureAllReplicasAreActive(testCollectionName, SHARD1, 1, 3, maxWaitSecsToSeeAllActive);
|
|
||||||
assertEquals("Expected 2 replicas for collection " + testCollectionName
|
|
||||||
+ " but found " + notLeaders.size() + "; clusterState: "
|
|
||||||
+ printClusterStateInfo(testCollectionName), 2, notLeaders.size());
|
|
||||||
List<JettySolrRunner> notLeaderJetties = notLeaders.stream().map(rep -> getJettyOnPort(getReplicaPort(rep)))
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
|
|
||||||
Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, SHARD1);
|
|
||||||
JettySolrRunner leaderJetty = getJettyOnPort(getReplicaPort(leader));
|
|
||||||
|
|
||||||
// remove leader from clusterstate
|
|
||||||
ZkNodeProps m = new ZkNodeProps(
|
|
||||||
Overseer.QUEUE_OPERATION, OverseerAction.DELETECORE.toLower(),
|
|
||||||
ZkStateReader.CORE_NAME_PROP, leader.getCoreName(),
|
|
||||||
ZkStateReader.NODE_NAME_PROP, leader.getNodeName(),
|
|
||||||
ZkStateReader.COLLECTION_PROP, testCollectionName,
|
|
||||||
ZkStateReader.CORE_NODE_NAME_PROP, leader.getName(),
|
|
||||||
ZkStateReader.BASE_URL_PROP, leader.getBaseUrl());
|
|
||||||
Overseer.getStateUpdateQueue(cloudClient.getZkStateReader().getZkClient()).offer(Utils.toJSON(m));
|
|
||||||
|
|
||||||
boolean restartOtherReplicas = random().nextBoolean();
|
|
||||||
log.info("Starting test with restartOtherReplicas:{}", restartOtherReplicas);
|
|
||||||
if (restartOtherReplicas) {
|
|
||||||
for (JettySolrRunner notLeaderJetty : notLeaderJetties) {
|
|
||||||
notLeaderJetty.stop();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
cloudClient.waitForState(testCollectionName, 30, TimeUnit.SECONDS,
|
|
||||||
(liveNodes, collectionState) -> collectionState.getReplicas().size() == 2);
|
|
||||||
|
|
||||||
if (restartOtherReplicas) {
|
|
||||||
for (JettySolrRunner notLeaderJetty : notLeaderJetties) {
|
|
||||||
notLeaderJetty.start();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
log.info("Before forcing leader: " + cloudClient.getZkStateReader().getClusterState()
|
|
||||||
.getCollection(testCollectionName).getSlice(SHARD1));
|
|
||||||
doForceLeader(cloudClient, testCollectionName, SHARD1);
|
|
||||||
|
|
||||||
// By now we have an active leader. Wait for recoveries to begin
|
|
||||||
waitForRecoveriesToFinish(testCollectionName, cloudClient.getZkStateReader(), true);
|
|
||||||
ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
|
|
||||||
log.info("After forcing leader: " + clusterState.getCollection(testCollectionName).getSlice(SHARD1));
|
|
||||||
|
|
||||||
assertNull("Expected zombie leader get deleted", leaderJetty.getCoreContainer().getCore(leader.getCoreName()));
|
|
||||||
Replica newLeader = clusterState.getCollectionOrNull(testCollectionName).getSlice(SHARD1).getLeader();
|
|
||||||
assertNotNull(newLeader);
|
|
||||||
assertEquals(State.ACTIVE, newLeader.getState());
|
|
||||||
|
|
||||||
int numActiveReplicas = getNumberOfActiveReplicas(clusterState, testCollectionName, SHARD1);
|
|
||||||
assertEquals(2, numActiveReplicas);
|
|
||||||
|
|
||||||
// Assert that indexing works again
|
|
||||||
sendDoc(1);
|
|
||||||
cloudClient.commit();
|
|
||||||
|
|
||||||
assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 1);
|
|
||||||
} finally {
|
|
||||||
log.info("Cleaning up after the test.");
|
|
||||||
// try to clean up
|
|
||||||
attemptCollectionDelete(cloudClient, testCollectionName);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests that FORCELEADER can get an active leader even only replicas with term lower than leader's term are live
|
* Tests that FORCELEADER can get an active leader even only replicas with term lower than leader's term are live
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -60,9 +60,6 @@ import org.slf4j.LoggerFactory;
|
||||||
public class MoveReplicaTest extends SolrCloudTestCase {
|
public class MoveReplicaTest extends SolrCloudTestCase {
|
||||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||||
|
|
||||||
private static ZkStateReaderAccessor accessor;
|
|
||||||
private static int overseerLeaderIndex;
|
|
||||||
|
|
||||||
// used by MoveReplicaHDFSTest
|
// used by MoveReplicaHDFSTest
|
||||||
protected boolean inPlaceMove = true;
|
protected boolean inPlaceMove = true;
|
||||||
|
|
||||||
|
@ -78,14 +75,12 @@ public class MoveReplicaTest extends SolrCloudTestCase {
|
||||||
JettySolrRunner jetty = cluster.getJettySolrRunner(i);
|
JettySolrRunner jetty = cluster.getJettySolrRunner(i);
|
||||||
if (jetty.getNodeName().equals(overseerLeader)) {
|
if (jetty.getNodeName().equals(overseerLeader)) {
|
||||||
overseerJetty = jetty;
|
overseerJetty = jetty;
|
||||||
overseerLeaderIndex = i;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (overseerJetty == null) {
|
if (overseerJetty == null) {
|
||||||
fail("no overseer leader!");
|
fail("no overseer leader!");
|
||||||
}
|
}
|
||||||
accessor = new ZkStateReaderAccessor(overseerJetty.getCoreContainer().getZkController().getZkStateReader());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected String getSolrXml() {
|
protected String getSolrXml() {
|
||||||
|
@ -137,8 +132,6 @@ public class MoveReplicaTest extends SolrCloudTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Set<CollectionStateWatcher> watchers = new HashSet<>(accessor.getStateWatchers(coll));
|
|
||||||
|
|
||||||
int sourceNumCores = getNumOfCores(cloudClient, replica.getNodeName(), coll);
|
int sourceNumCores = getNumOfCores(cloudClient, replica.getNodeName(), coll);
|
||||||
int targetNumCores = getNumOfCores(cloudClient, targetNode, coll);
|
int targetNumCores = getNumOfCores(cloudClient, targetNode, coll);
|
||||||
|
|
||||||
|
@ -201,9 +194,6 @@ public class MoveReplicaTest extends SolrCloudTestCase {
|
||||||
|
|
||||||
assertEquals(100, cluster.getSolrClient().query(coll, new SolrQuery("*:*")).getResults().getNumFound());
|
assertEquals(100, cluster.getSolrClient().query(coll, new SolrQuery("*:*")).getResults().getNumFound());
|
||||||
|
|
||||||
Set<CollectionStateWatcher> newWatchers = new HashSet<>(accessor.getStateWatchers(coll));
|
|
||||||
assertEquals(watchers, newWatchers);
|
|
||||||
|
|
||||||
moveReplica = createMoveReplicaRequest(coll, replica, targetNode, shardId);
|
moveReplica = createMoveReplicaRequest(coll, replica, targetNode, shardId);
|
||||||
moveReplica.setInPlaceMove(inPlaceMove);
|
moveReplica.setInPlaceMove(inPlaceMove);
|
||||||
moveReplica.process(cloudClient);
|
moveReplica.process(cloudClient);
|
||||||
|
@ -243,8 +233,6 @@ public class MoveReplicaTest extends SolrCloudTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assertTrue("replica never fully recovered", recovered);
|
assertTrue("replica never fully recovered", recovered);
|
||||||
newWatchers = new HashSet<>(accessor.getStateWatchers(coll));
|
|
||||||
assertEquals(watchers, newWatchers);
|
|
||||||
|
|
||||||
assertEquals(100, cluster.getSolrClient().query(coll, new SolrQuery("*:*")).getResults().getNumFound());
|
assertEquals(100, cluster.getSolrClient().query(coll, new SolrQuery("*:*")).getResults().getNumFound());
|
||||||
}
|
}
|
||||||
|
@ -258,8 +246,6 @@ public class MoveReplicaTest extends SolrCloudTestCase {
|
||||||
|
|
||||||
CloudSolrClient cloudClient = cluster.getSolrClient();
|
CloudSolrClient cloudClient = cluster.getSolrClient();
|
||||||
|
|
||||||
Set<CollectionStateWatcher> watchers = new HashSet<>(accessor.getStateWatchers(coll));
|
|
||||||
|
|
||||||
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(coll, "conf1", 2, REPLICATION);
|
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(coll, "conf1", 2, REPLICATION);
|
||||||
create.setAutoAddReplicas(false);
|
create.setAutoAddReplicas(false);
|
||||||
cloudClient.request(create);
|
cloudClient.request(create);
|
||||||
|
@ -303,9 +289,6 @@ public class MoveReplicaTest extends SolrCloudTestCase {
|
||||||
}
|
}
|
||||||
assertFalse(success);
|
assertFalse(success);
|
||||||
|
|
||||||
Set<CollectionStateWatcher> newWatchers = new HashSet<>(accessor.getStateWatchers(coll));
|
|
||||||
assertEquals(watchers, newWatchers);
|
|
||||||
|
|
||||||
log.info("--- current collection state: " + cloudClient.getZkStateReader().getClusterState().getCollection(coll));
|
log.info("--- current collection state: " + cloudClient.getZkStateReader().getClusterState().getCollection(coll));
|
||||||
assertEquals(100, cluster.getSolrClient().query(coll, new SolrQuery("*:*")).getResults().getNumFound());
|
assertEquals(100, cluster.getSolrClient().query(coll, new SolrQuery("*:*")).getResults().getNumFound());
|
||||||
}
|
}
|
||||||
|
|
|
@ -1572,8 +1572,12 @@ public class ZkStateReader implements Closeable {
|
||||||
return v;
|
return v;
|
||||||
});
|
});
|
||||||
for (CollectionStateWatcher watcher : watchers) {
|
for (CollectionStateWatcher watcher : watchers) {
|
||||||
if (watcher.onStateChanged(liveNodes, collectionState)) {
|
try {
|
||||||
removeCollectionStateWatcher(collection, watcher);
|
if (watcher.onStateChanged(liveNodes, collectionState)) {
|
||||||
|
removeCollectionStateWatcher(collection, watcher);
|
||||||
|
}
|
||||||
|
} catch (Throwable throwable) {
|
||||||
|
LOG.warn("Error on calling watcher", throwable);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue