mirror of https://github.com/apache/lucene.git
SOLR-11124: MoveReplicaCmd should skip deleting old replica in case of its node is not live
This commit is contained in:
parent
227eeefcd0
commit
bd283c895f
|
@ -79,6 +79,8 @@ Optimizations
|
|||
* SOLR-10985: Remove unnecessary toString() calls in solr-core's search package's debug logging.
|
||||
(Michael Braun via Christine Poerschke)
|
||||
|
||||
* SOLR-11124: MoveReplicaCmd should skip deleting old replica in case of its node is not live (Cao Manh Dat)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -119,36 +119,41 @@ public class MoveReplicaCmd implements Cmd{
|
|||
|
||||
private void moveHdfsReplica(ClusterState clusterState, NamedList results, String dataDir, String targetNode, String async,
|
||||
DocCollection coll, Replica replica, Slice slice, int timeout) throws Exception {
|
||||
ZkNodeProps removeReplicasProps = new ZkNodeProps(
|
||||
COLLECTION_PROP, coll.getName(),
|
||||
SHARD_ID_PROP, slice.getName(),
|
||||
REPLICA_PROP, replica.getName()
|
||||
);
|
||||
removeReplicasProps.getProperties().put(CoreAdminParams.DELETE_DATA_DIR, false);
|
||||
removeReplicasProps.getProperties().put(CoreAdminParams.DELETE_INDEX, false);
|
||||
if(async!=null) removeReplicasProps.getProperties().put(ASYNC, async);
|
||||
NamedList deleteResult = new NamedList();
|
||||
ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
|
||||
if (deleteResult.get("failure") != null) {
|
||||
String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s shard=%s name=%s",
|
||||
coll.getName(), slice.getName(), replica.getName());
|
||||
log.warn(errorString);
|
||||
results.add("failure", errorString + ", because of : " + deleteResult.get("failure"));
|
||||
return;
|
||||
}
|
||||
|
||||
TimeOut timeOut = new TimeOut(20L, TimeUnit.SECONDS);
|
||||
while (!timeOut.hasTimedOut()) {
|
||||
coll = ocmh.zkStateReader.getClusterState().getCollection(coll.getName());
|
||||
if (coll.getReplica(replica.getName()) != null) {
|
||||
Thread.sleep(100);
|
||||
} else {
|
||||
break;
|
||||
String skipCreateReplicaInClusterState = "true";
|
||||
if (clusterState.getLiveNodes().contains(replica.getNodeName())) {
|
||||
skipCreateReplicaInClusterState = "false";
|
||||
ZkNodeProps removeReplicasProps = new ZkNodeProps(
|
||||
COLLECTION_PROP, coll.getName(),
|
||||
SHARD_ID_PROP, slice.getName(),
|
||||
REPLICA_PROP, replica.getName()
|
||||
);
|
||||
removeReplicasProps.getProperties().put(CoreAdminParams.DELETE_DATA_DIR, false);
|
||||
removeReplicasProps.getProperties().put(CoreAdminParams.DELETE_INDEX, false);
|
||||
if(async!=null) removeReplicasProps.getProperties().put(ASYNC, async);
|
||||
NamedList deleteResult = new NamedList();
|
||||
ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
|
||||
if (deleteResult.get("failure") != null) {
|
||||
String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s shard=%s name=%s",
|
||||
coll.getName(), slice.getName(), replica.getName());
|
||||
log.warn(errorString);
|
||||
results.add("failure", errorString + ", because of : " + deleteResult.get("failure"));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (timeOut.hasTimedOut()) {
|
||||
results.add("failure", "Still see deleted replica in clusterstate!");
|
||||
return;
|
||||
|
||||
TimeOut timeOut = new TimeOut(20L, TimeUnit.SECONDS);
|
||||
while (!timeOut.hasTimedOut()) {
|
||||
coll = ocmh.zkStateReader.getClusterState().getCollection(coll.getName());
|
||||
if (coll.getReplica(replica.getName()) != null) {
|
||||
Thread.sleep(100);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (timeOut.hasTimedOut()) {
|
||||
results.add("failure", "Still see deleted replica in clusterstate!");
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
String ulogDir = replica.getStr(CoreAdminParams.ULOG_DIR);
|
||||
|
@ -158,6 +163,7 @@ public class MoveReplicaCmd implements Cmd{
|
|||
CoreAdminParams.NODE, targetNode,
|
||||
CoreAdminParams.CORE_NODE_NAME, replica.getName(),
|
||||
CoreAdminParams.NAME, replica.getCoreName(),
|
||||
SKIP_CREATE_REPLICA_IN_CLUSTER_STATE, skipCreateReplicaInClusterState,
|
||||
CoreAdminParams.ULOG_DIR, ulogDir.substring(0, ulogDir.lastIndexOf(UpdateLog.TLOG_NAME)),
|
||||
CoreAdminParams.DATA_DIR, dataDir);
|
||||
if(async!=null) addReplicasProps.getProperties().put(ASYNC, async);
|
||||
|
|
Loading…
Reference in New Issue