SOLR-5233: The deleteshard collections API doesn't wait for cluster state to update, can fail if some nodes of the deleted shard were down and had incorrect logging.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1522463 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shalin Shekhar Mangar 2013-09-12 07:14:10 +00:00
parent 098f8b37f4
commit 247142e18c
5 changed files with 19 additions and 13 deletions

View File

@ -233,6 +233,10 @@ Bug Fixes
documents in the same index segment had a value of true.
(Robert Muir, hossman, yonik)
* SOLR-5233: The "deleteshard" collections API doesn't wait for cluster state to update,
can fail if some nodes of the deleted shard were down and had incorrect logging.
(Christine Poerschke, shalin)
Optimizations
----------------------

View File

@ -598,10 +598,11 @@ public class Overseer {
* Remove collection slice from cloudstate
*/
private ClusterState removeShard(final ClusterState clusterState, ZkNodeProps message) {
final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
final String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP);
log.info("Removing collection: " + collection + " shard: " + sliceId + " from clusterstate");
final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy
DocCollection coll = newCollections.get(collection);

View File

@ -820,7 +820,8 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
} while (srsp != null);
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION,
Overseer.REMOVESHARD, ZkStateReader.COLLECTION_PROP, collection);
Overseer.REMOVESHARD, ZkStateReader.COLLECTION_PROP, collection,
ZkStateReader.SHARD_ID_PROP, sliceId);
Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(m));
// wait for a while until we don't see the shard
@ -829,7 +830,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
boolean removed = false;
while (System.currentTimeMillis() < timeout) {
Thread.sleep(100);
removed = zkStateReader.getClusterState().getSlice(collection, message.getStr("name")) == null;
removed = zkStateReader.getClusterState().getSlice(collection, sliceId) == null;
if (removed) {
Thread.sleep(100); // just a bit of time so it's more likely other readers see on return
break;
@ -837,15 +838,15 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
}
if (!removed) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"Could not fully remove collection: " + collection + " shard: " + message.getStr("name"));
"Could not fully remove collection: " + collection + " shard: " + sliceId);
}
log.info("Successfully deleted collection " + collection + ", shard: " + message.getStr("name"));
log.info("Successfully deleted collection: " + collection + ", shard: " + sliceId);
} catch (SolrException e) {
throw e;
} catch (Exception e) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Error executing delete operation for collection: " + collection + " shard: " + message.getStr("name"), e);
throw new SolrException(ErrorCode.SERVER_ERROR, "Error executing delete operation for collection: " + collection + " shard: " + sliceId, e);
}
}

View File

@ -329,11 +329,11 @@ public class CollectionsHandler extends RequestHandlerBase {
private void handleDeleteShardAction(SolrQueryRequest req,
SolrQueryResponse rsp) throws InterruptedException, KeeperException {
log.info("Deleting Shard : " + req.getParamString());
String name = req.getParams().required().get("collection");
String shard = req.getParams().required().get("shard");
String name = req.getParams().required().get(ZkStateReader.COLLECTION_PROP);
String shard = req.getParams().required().get(ZkStateReader.SHARD_ID_PROP);
Map<String,Object> props = new HashMap<String,Object>();
props.put("collection", name);
props.put(ZkStateReader.COLLECTION_PROP, name);
props.put(Overseer.QUEUE_OPERATION, OverseerCollectionProcessor.DELETESHARD);
props.put(ZkStateReader.SHARD_ID_PROP, shard);

View File

@ -101,18 +101,18 @@ public class DeleteShardTest extends AbstractFullDistribZkTestBase {
deleteShard(SHARD1);
confirmShardDeletion();
confirmShardDeletion(SHARD1);
}
protected void confirmShardDeletion() throws SolrServerException, KeeperException,
protected void confirmShardDeletion(String shard) throws SolrServerException, KeeperException,
InterruptedException {
ZkStateReader zkStateReader = cloudClient.getZkStateReader();
ClusterState clusterState = null;
ClusterState clusterState = zkStateReader.getClusterState();
int counter = 10;
while (counter-- > 0) {
zkStateReader.updateClusterState(true);
clusterState = zkStateReader.getClusterState();
if (clusterState.getSlice("collection1", SHARD1) == null) {
if (clusterState.getSlice("collection1", shard) == null) {
break;
}
Thread.sleep(1000);