SOLR-11124: MoveReplicaCmd should skip deleting old replica in case of its node is not live

This commit is contained in:
Cao Manh Dat 2017-07-20 14:39:30 +07:00
parent 227eeefcd0
commit bd283c895f
2 changed files with 37 additions and 29 deletions

View File

@ -79,6 +79,8 @@ Optimizations
* SOLR-10985: Remove unnecessary toString() calls in solr-core's search package's debug logging. * SOLR-10985: Remove unnecessary toString() calls in solr-core's search package's debug logging.
(Michael Braun via Christine Poerschke) (Michael Braun via Christine Poerschke)
* SOLR-11124: MoveReplicaCmd should skip deleting old replica in case of its node is not live (Cao Manh Dat)
Other Changes Other Changes
---------------------- ----------------------

View File

@ -119,36 +119,41 @@ public class MoveReplicaCmd implements Cmd{
private void moveHdfsReplica(ClusterState clusterState, NamedList results, String dataDir, String targetNode, String async, private void moveHdfsReplica(ClusterState clusterState, NamedList results, String dataDir, String targetNode, String async,
DocCollection coll, Replica replica, Slice slice, int timeout) throws Exception { DocCollection coll, Replica replica, Slice slice, int timeout) throws Exception {
ZkNodeProps removeReplicasProps = new ZkNodeProps( String skipCreateReplicaInClusterState = "true";
COLLECTION_PROP, coll.getName(), if (clusterState.getLiveNodes().contains(replica.getNodeName())) {
SHARD_ID_PROP, slice.getName(), skipCreateReplicaInClusterState = "false";
REPLICA_PROP, replica.getName() ZkNodeProps removeReplicasProps = new ZkNodeProps(
); COLLECTION_PROP, coll.getName(),
removeReplicasProps.getProperties().put(CoreAdminParams.DELETE_DATA_DIR, false); SHARD_ID_PROP, slice.getName(),
removeReplicasProps.getProperties().put(CoreAdminParams.DELETE_INDEX, false); REPLICA_PROP, replica.getName()
if(async!=null) removeReplicasProps.getProperties().put(ASYNC, async); );
NamedList deleteResult = new NamedList(); removeReplicasProps.getProperties().put(CoreAdminParams.DELETE_DATA_DIR, false);
ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null); removeReplicasProps.getProperties().put(CoreAdminParams.DELETE_INDEX, false);
if (deleteResult.get("failure") != null) { if(async!=null) removeReplicasProps.getProperties().put(ASYNC, async);
String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s shard=%s name=%s", NamedList deleteResult = new NamedList();
coll.getName(), slice.getName(), replica.getName()); ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
log.warn(errorString); if (deleteResult.get("failure") != null) {
results.add("failure", errorString + ", because of : " + deleteResult.get("failure")); String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s shard=%s name=%s",
return; coll.getName(), slice.getName(), replica.getName());
} log.warn(errorString);
results.add("failure", errorString + ", because of : " + deleteResult.get("failure"));
TimeOut timeOut = new TimeOut(20L, TimeUnit.SECONDS); return;
while (!timeOut.hasTimedOut()) {
coll = ocmh.zkStateReader.getClusterState().getCollection(coll.getName());
if (coll.getReplica(replica.getName()) != null) {
Thread.sleep(100);
} else {
break;
} }
}
if (timeOut.hasTimedOut()) { TimeOut timeOut = new TimeOut(20L, TimeUnit.SECONDS);
results.add("failure", "Still see deleted replica in clusterstate!"); while (!timeOut.hasTimedOut()) {
return; coll = ocmh.zkStateReader.getClusterState().getCollection(coll.getName());
if (coll.getReplica(replica.getName()) != null) {
Thread.sleep(100);
} else {
break;
}
}
if (timeOut.hasTimedOut()) {
results.add("failure", "Still see deleted replica in clusterstate!");
return;
}
} }
String ulogDir = replica.getStr(CoreAdminParams.ULOG_DIR); String ulogDir = replica.getStr(CoreAdminParams.ULOG_DIR);
@ -158,6 +163,7 @@ public class MoveReplicaCmd implements Cmd{
CoreAdminParams.NODE, targetNode, CoreAdminParams.NODE, targetNode,
CoreAdminParams.CORE_NODE_NAME, replica.getName(), CoreAdminParams.CORE_NODE_NAME, replica.getName(),
CoreAdminParams.NAME, replica.getCoreName(), CoreAdminParams.NAME, replica.getCoreName(),
SKIP_CREATE_REPLICA_IN_CLUSTER_STATE, skipCreateReplicaInClusterState,
CoreAdminParams.ULOG_DIR, ulogDir.substring(0, ulogDir.lastIndexOf(UpdateLog.TLOG_NAME)), CoreAdminParams.ULOG_DIR, ulogDir.substring(0, ulogDir.lastIndexOf(UpdateLog.TLOG_NAME)),
CoreAdminParams.DATA_DIR, dataDir); CoreAdminParams.DATA_DIR, dataDir);
if(async!=null) addReplicasProps.getProperties().put(ASYNC, async); if(async!=null) addReplicasProps.getProperties().put(ASYNC, async);