SOLR-5233: The deleteshard collections API doesn't wait for cluster state to update, can fail if some nodes of the deleted shard were down and had incorrect logging.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1522463 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shalin Shekhar Mangar 2013-09-12 07:14:10 +00:00
parent 098f8b37f4
commit 247142e18c
5 changed files with 19 additions and 13 deletions

View File

@ -233,6 +233,10 @@ Bug Fixes
documents in the same index segment had a value of true. documents in the same index segment had a value of true.
(Robert Muir, hossman, yonik) (Robert Muir, hossman, yonik)
* SOLR-5233: The "deleteshard" collections API doesn't wait for cluster state to update,
can fail if some nodes of the deleted shard were down and had incorrect logging.
(Christine Poerschke, shalin)
Optimizations Optimizations
---------------------- ----------------------

View File

@ -598,10 +598,11 @@ public class Overseer {
* Remove collection slice from cloudstate * Remove collection slice from cloudstate
*/ */
private ClusterState removeShard(final ClusterState clusterState, ZkNodeProps message) { private ClusterState removeShard(final ClusterState clusterState, ZkNodeProps message) {
final String collection = message.getStr(ZkStateReader.COLLECTION_PROP); final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
final String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP); final String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP);
log.info("Removing collection: " + collection + " shard: " + sliceId + " from clusterstate");
final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy
DocCollection coll = newCollections.get(collection); DocCollection coll = newCollections.get(collection);

View File

@ -820,7 +820,8 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
} while (srsp != null); } while (srsp != null);
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION,
Overseer.REMOVESHARD, ZkStateReader.COLLECTION_PROP, collection); Overseer.REMOVESHARD, ZkStateReader.COLLECTION_PROP, collection,
ZkStateReader.SHARD_ID_PROP, sliceId);
Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(m)); Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(m));
// wait for a while until we don't see the shard // wait for a while until we don't see the shard
@ -829,7 +830,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
boolean removed = false; boolean removed = false;
while (System.currentTimeMillis() < timeout) { while (System.currentTimeMillis() < timeout) {
Thread.sleep(100); Thread.sleep(100);
removed = zkStateReader.getClusterState().getSlice(collection, message.getStr("name")) == null; removed = zkStateReader.getClusterState().getSlice(collection, sliceId) == null;
if (removed) { if (removed) {
Thread.sleep(100); // just a bit of time so it's more likely other readers see on return Thread.sleep(100); // just a bit of time so it's more likely other readers see on return
break; break;
@ -837,15 +838,15 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
} }
if (!removed) { if (!removed) {
throw new SolrException(ErrorCode.SERVER_ERROR, throw new SolrException(ErrorCode.SERVER_ERROR,
"Could not fully remove collection: " + collection + " shard: " + message.getStr("name")); "Could not fully remove collection: " + collection + " shard: " + sliceId);
} }
log.info("Successfully deleted collection " + collection + ", shard: " + message.getStr("name")); log.info("Successfully deleted collection: " + collection + ", shard: " + sliceId);
} catch (SolrException e) { } catch (SolrException e) {
throw e; throw e;
} catch (Exception e) { } catch (Exception e) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Error executing delete operation for collection: " + collection + " shard: " + message.getStr("name"), e); throw new SolrException(ErrorCode.SERVER_ERROR, "Error executing delete operation for collection: " + collection + " shard: " + sliceId, e);
} }
} }

View File

@ -329,11 +329,11 @@ public class CollectionsHandler extends RequestHandlerBase {
private void handleDeleteShardAction(SolrQueryRequest req, private void handleDeleteShardAction(SolrQueryRequest req,
SolrQueryResponse rsp) throws InterruptedException, KeeperException { SolrQueryResponse rsp) throws InterruptedException, KeeperException {
log.info("Deleting Shard : " + req.getParamString()); log.info("Deleting Shard : " + req.getParamString());
String name = req.getParams().required().get("collection"); String name = req.getParams().required().get(ZkStateReader.COLLECTION_PROP);
String shard = req.getParams().required().get("shard"); String shard = req.getParams().required().get(ZkStateReader.SHARD_ID_PROP);
Map<String,Object> props = new HashMap<String,Object>(); Map<String,Object> props = new HashMap<String,Object>();
props.put("collection", name); props.put(ZkStateReader.COLLECTION_PROP, name);
props.put(Overseer.QUEUE_OPERATION, OverseerCollectionProcessor.DELETESHARD); props.put(Overseer.QUEUE_OPERATION, OverseerCollectionProcessor.DELETESHARD);
props.put(ZkStateReader.SHARD_ID_PROP, shard); props.put(ZkStateReader.SHARD_ID_PROP, shard);

View File

@ -101,18 +101,18 @@ public class DeleteShardTest extends AbstractFullDistribZkTestBase {
deleteShard(SHARD1); deleteShard(SHARD1);
confirmShardDeletion(); confirmShardDeletion(SHARD1);
} }
protected void confirmShardDeletion() throws SolrServerException, KeeperException, protected void confirmShardDeletion(String shard) throws SolrServerException, KeeperException,
InterruptedException { InterruptedException {
ZkStateReader zkStateReader = cloudClient.getZkStateReader(); ZkStateReader zkStateReader = cloudClient.getZkStateReader();
ClusterState clusterState = null; ClusterState clusterState = zkStateReader.getClusterState();
int counter = 10; int counter = 10;
while (counter-- > 0) { while (counter-- > 0) {
zkStateReader.updateClusterState(true); zkStateReader.updateClusterState(true);
clusterState = zkStateReader.getClusterState(); clusterState = zkStateReader.getClusterState();
if (clusterState.getSlice("collection1", SHARD1) == null) { if (clusterState.getSlice("collection1", shard) == null) {
break; break;
} }
Thread.sleep(1000); Thread.sleep(1000);