mirror of https://github.com/apache/lucene.git
SOLR-11124: MoveReplicaCmd should skip deleting old replica in case of its node is not live
This commit is contained in:
parent
227eeefcd0
commit
bd283c895f
|
@ -79,6 +79,8 @@ Optimizations
|
||||||
* SOLR-10985: Remove unnecessary toString() calls in solr-core's search package's debug logging.
|
* SOLR-10985: Remove unnecessary toString() calls in solr-core's search package's debug logging.
|
||||||
(Michael Braun via Christine Poerschke)
|
(Michael Braun via Christine Poerschke)
|
||||||
|
|
||||||
|
* SOLR-11124: MoveReplicaCmd should skip deleting old replica in case of its node is not live (Cao Manh Dat)
|
||||||
|
|
||||||
Other Changes
|
Other Changes
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -119,36 +119,41 @@ public class MoveReplicaCmd implements Cmd{
|
||||||
|
|
||||||
private void moveHdfsReplica(ClusterState clusterState, NamedList results, String dataDir, String targetNode, String async,
|
private void moveHdfsReplica(ClusterState clusterState, NamedList results, String dataDir, String targetNode, String async,
|
||||||
DocCollection coll, Replica replica, Slice slice, int timeout) throws Exception {
|
DocCollection coll, Replica replica, Slice slice, int timeout) throws Exception {
|
||||||
ZkNodeProps removeReplicasProps = new ZkNodeProps(
|
String skipCreateReplicaInClusterState = "true";
|
||||||
COLLECTION_PROP, coll.getName(),
|
if (clusterState.getLiveNodes().contains(replica.getNodeName())) {
|
||||||
SHARD_ID_PROP, slice.getName(),
|
skipCreateReplicaInClusterState = "false";
|
||||||
REPLICA_PROP, replica.getName()
|
ZkNodeProps removeReplicasProps = new ZkNodeProps(
|
||||||
);
|
COLLECTION_PROP, coll.getName(),
|
||||||
removeReplicasProps.getProperties().put(CoreAdminParams.DELETE_DATA_DIR, false);
|
SHARD_ID_PROP, slice.getName(),
|
||||||
removeReplicasProps.getProperties().put(CoreAdminParams.DELETE_INDEX, false);
|
REPLICA_PROP, replica.getName()
|
||||||
if(async!=null) removeReplicasProps.getProperties().put(ASYNC, async);
|
);
|
||||||
NamedList deleteResult = new NamedList();
|
removeReplicasProps.getProperties().put(CoreAdminParams.DELETE_DATA_DIR, false);
|
||||||
ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
|
removeReplicasProps.getProperties().put(CoreAdminParams.DELETE_INDEX, false);
|
||||||
if (deleteResult.get("failure") != null) {
|
if(async!=null) removeReplicasProps.getProperties().put(ASYNC, async);
|
||||||
String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s shard=%s name=%s",
|
NamedList deleteResult = new NamedList();
|
||||||
coll.getName(), slice.getName(), replica.getName());
|
ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
|
||||||
log.warn(errorString);
|
if (deleteResult.get("failure") != null) {
|
||||||
results.add("failure", errorString + ", because of : " + deleteResult.get("failure"));
|
String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s shard=%s name=%s",
|
||||||
return;
|
coll.getName(), slice.getName(), replica.getName());
|
||||||
}
|
log.warn(errorString);
|
||||||
|
results.add("failure", errorString + ", because of : " + deleteResult.get("failure"));
|
||||||
TimeOut timeOut = new TimeOut(20L, TimeUnit.SECONDS);
|
return;
|
||||||
while (!timeOut.hasTimedOut()) {
|
|
||||||
coll = ocmh.zkStateReader.getClusterState().getCollection(coll.getName());
|
|
||||||
if (coll.getReplica(replica.getName()) != null) {
|
|
||||||
Thread.sleep(100);
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if (timeOut.hasTimedOut()) {
|
TimeOut timeOut = new TimeOut(20L, TimeUnit.SECONDS);
|
||||||
results.add("failure", "Still see deleted replica in clusterstate!");
|
while (!timeOut.hasTimedOut()) {
|
||||||
return;
|
coll = ocmh.zkStateReader.getClusterState().getCollection(coll.getName());
|
||||||
|
if (coll.getReplica(replica.getName()) != null) {
|
||||||
|
Thread.sleep(100);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (timeOut.hasTimedOut()) {
|
||||||
|
results.add("failure", "Still see deleted replica in clusterstate!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
String ulogDir = replica.getStr(CoreAdminParams.ULOG_DIR);
|
String ulogDir = replica.getStr(CoreAdminParams.ULOG_DIR);
|
||||||
|
@ -158,6 +163,7 @@ public class MoveReplicaCmd implements Cmd{
|
||||||
CoreAdminParams.NODE, targetNode,
|
CoreAdminParams.NODE, targetNode,
|
||||||
CoreAdminParams.CORE_NODE_NAME, replica.getName(),
|
CoreAdminParams.CORE_NODE_NAME, replica.getName(),
|
||||||
CoreAdminParams.NAME, replica.getCoreName(),
|
CoreAdminParams.NAME, replica.getCoreName(),
|
||||||
|
SKIP_CREATE_REPLICA_IN_CLUSTER_STATE, skipCreateReplicaInClusterState,
|
||||||
CoreAdminParams.ULOG_DIR, ulogDir.substring(0, ulogDir.lastIndexOf(UpdateLog.TLOG_NAME)),
|
CoreAdminParams.ULOG_DIR, ulogDir.substring(0, ulogDir.lastIndexOf(UpdateLog.TLOG_NAME)),
|
||||||
CoreAdminParams.DATA_DIR, dataDir);
|
CoreAdminParams.DATA_DIR, dataDir);
|
||||||
if(async!=null) addReplicasProps.getProperties().put(ASYNC, async);
|
if(async!=null) addReplicasProps.getProperties().put(ASYNC, async);
|
||||||
|
|
Loading…
Reference in New Issue