mirror of https://github.com/apache/lucene.git
SOLR-5233: The deleteshard collections API doesn't wait for cluster state to update, can fail if some nodes of the deleted shard were down and had incorrect logging.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1522463 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
098f8b37f4
commit
247142e18c
|
@ -233,6 +233,10 @@ Bug Fixes
|
|||
documents in the same index segment had a value of true.
|
||||
(Robert Muir, hossman, yonik)
|
||||
|
||||
* SOLR-5233: The "deleteshard" collections API doesn't wait for cluster state to update,
|
||||
can fail if some nodes of the deleted shard were down and had incorrect logging.
|
||||
(Christine Poerschke, shalin)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -598,10 +598,11 @@ public class Overseer {
|
|||
* Remove collection slice from cloudstate
|
||||
*/
|
||||
private ClusterState removeShard(final ClusterState clusterState, ZkNodeProps message) {
|
||||
|
||||
final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
|
||||
final String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP);
|
||||
|
||||
log.info("Removing collection: " + collection + " shard: " + sliceId + " from clusterstate");
|
||||
|
||||
final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy
|
||||
DocCollection coll = newCollections.get(collection);
|
||||
|
||||
|
|
|
@ -820,7 +820,8 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
|
|||
} while (srsp != null);
|
||||
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION,
|
||||
Overseer.REMOVESHARD, ZkStateReader.COLLECTION_PROP, collection);
|
||||
Overseer.REMOVESHARD, ZkStateReader.COLLECTION_PROP, collection,
|
||||
ZkStateReader.SHARD_ID_PROP, sliceId);
|
||||
Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(m));
|
||||
|
||||
// wait for a while until we don't see the shard
|
||||
|
@ -829,7 +830,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
|
|||
boolean removed = false;
|
||||
while (System.currentTimeMillis() < timeout) {
|
||||
Thread.sleep(100);
|
||||
removed = zkStateReader.getClusterState().getSlice(collection, message.getStr("name")) == null;
|
||||
removed = zkStateReader.getClusterState().getSlice(collection, sliceId) == null;
|
||||
if (removed) {
|
||||
Thread.sleep(100); // just a bit of time so it's more likely other readers see on return
|
||||
break;
|
||||
|
@ -837,15 +838,15 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
|
|||
}
|
||||
if (!removed) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
"Could not fully remove collection: " + collection + " shard: " + message.getStr("name"));
|
||||
"Could not fully remove collection: " + collection + " shard: " + sliceId);
|
||||
}
|
||||
|
||||
log.info("Successfully deleted collection " + collection + ", shard: " + message.getStr("name"));
|
||||
log.info("Successfully deleted collection: " + collection + ", shard: " + sliceId);
|
||||
|
||||
} catch (SolrException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Error executing delete operation for collection: " + collection + " shard: " + message.getStr("name"), e);
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Error executing delete operation for collection: " + collection + " shard: " + sliceId, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -329,11 +329,11 @@ public class CollectionsHandler extends RequestHandlerBase {
|
|||
private void handleDeleteShardAction(SolrQueryRequest req,
|
||||
SolrQueryResponse rsp) throws InterruptedException, KeeperException {
|
||||
log.info("Deleting Shard : " + req.getParamString());
|
||||
String name = req.getParams().required().get("collection");
|
||||
String shard = req.getParams().required().get("shard");
|
||||
String name = req.getParams().required().get(ZkStateReader.COLLECTION_PROP);
|
||||
String shard = req.getParams().required().get(ZkStateReader.SHARD_ID_PROP);
|
||||
|
||||
Map<String,Object> props = new HashMap<String,Object>();
|
||||
props.put("collection", name);
|
||||
props.put(ZkStateReader.COLLECTION_PROP, name);
|
||||
props.put(Overseer.QUEUE_OPERATION, OverseerCollectionProcessor.DELETESHARD);
|
||||
props.put(ZkStateReader.SHARD_ID_PROP, shard);
|
||||
|
||||
|
|
|
@ -101,18 +101,18 @@ public class DeleteShardTest extends AbstractFullDistribZkTestBase {
|
|||
|
||||
deleteShard(SHARD1);
|
||||
|
||||
confirmShardDeletion();
|
||||
confirmShardDeletion(SHARD1);
|
||||
}
|
||||
|
||||
protected void confirmShardDeletion() throws SolrServerException, KeeperException,
|
||||
protected void confirmShardDeletion(String shard) throws SolrServerException, KeeperException,
|
||||
InterruptedException {
|
||||
ZkStateReader zkStateReader = cloudClient.getZkStateReader();
|
||||
ClusterState clusterState = null;
|
||||
ClusterState clusterState = zkStateReader.getClusterState();
|
||||
int counter = 10;
|
||||
while (counter-- > 0) {
|
||||
zkStateReader.updateClusterState(true);
|
||||
clusterState = zkStateReader.getClusterState();
|
||||
if (clusterState.getSlice("collection1", SHARD1) == null) {
|
||||
if (clusterState.getSlice("collection1", shard) == null) {
|
||||
break;
|
||||
}
|
||||
Thread.sleep(1000);
|
||||
|
|
Loading…
Reference in New Issue