mirror of https://github.com/apache/lucene.git
SOLR-5233: The deleteshard collections API doesn't wait for cluster state to update, can fail if some nodes of the deleted shard were down and had incorrect logging.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1522463 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
098f8b37f4
commit
247142e18c
|
@ -233,6 +233,10 @@ Bug Fixes
|
||||||
documents in the same index segment had a value of true.
|
documents in the same index segment had a value of true.
|
||||||
(Robert Muir, hossman, yonik)
|
(Robert Muir, hossman, yonik)
|
||||||
|
|
||||||
|
* SOLR-5233: The "deleteshard" collections API doesn't wait for cluster state to update,
|
||||||
|
can fail if some nodes of the deleted shard were down and had incorrect logging.
|
||||||
|
(Christine Poerschke, shalin)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -598,10 +598,11 @@ public class Overseer {
|
||||||
* Remove collection slice from cloudstate
|
* Remove collection slice from cloudstate
|
||||||
*/
|
*/
|
||||||
private ClusterState removeShard(final ClusterState clusterState, ZkNodeProps message) {
|
private ClusterState removeShard(final ClusterState clusterState, ZkNodeProps message) {
|
||||||
|
|
||||||
final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
|
final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
|
||||||
final String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP);
|
final String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP);
|
||||||
|
|
||||||
|
log.info("Removing collection: " + collection + " shard: " + sliceId + " from clusterstate");
|
||||||
|
|
||||||
final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy
|
final Map<String, DocCollection> newCollections = new LinkedHashMap<String,DocCollection>(clusterState.getCollectionStates()); // shallow copy
|
||||||
DocCollection coll = newCollections.get(collection);
|
DocCollection coll = newCollections.get(collection);
|
||||||
|
|
||||||
|
|
|
@ -820,7 +820,8 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
|
||||||
} while (srsp != null);
|
} while (srsp != null);
|
||||||
|
|
||||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION,
|
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION,
|
||||||
Overseer.REMOVESHARD, ZkStateReader.COLLECTION_PROP, collection);
|
Overseer.REMOVESHARD, ZkStateReader.COLLECTION_PROP, collection,
|
||||||
|
ZkStateReader.SHARD_ID_PROP, sliceId);
|
||||||
Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(m));
|
Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(m));
|
||||||
|
|
||||||
// wait for a while until we don't see the shard
|
// wait for a while until we don't see the shard
|
||||||
|
@ -829,7 +830,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
|
||||||
boolean removed = false;
|
boolean removed = false;
|
||||||
while (System.currentTimeMillis() < timeout) {
|
while (System.currentTimeMillis() < timeout) {
|
||||||
Thread.sleep(100);
|
Thread.sleep(100);
|
||||||
removed = zkStateReader.getClusterState().getSlice(collection, message.getStr("name")) == null;
|
removed = zkStateReader.getClusterState().getSlice(collection, sliceId) == null;
|
||||||
if (removed) {
|
if (removed) {
|
||||||
Thread.sleep(100); // just a bit of time so it's more likely other readers see on return
|
Thread.sleep(100); // just a bit of time so it's more likely other readers see on return
|
||||||
break;
|
break;
|
||||||
|
@ -837,15 +838,15 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
|
||||||
}
|
}
|
||||||
if (!removed) {
|
if (!removed) {
|
||||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||||
"Could not fully remove collection: " + collection + " shard: " + message.getStr("name"));
|
"Could not fully remove collection: " + collection + " shard: " + sliceId);
|
||||||
}
|
}
|
||||||
|
|
||||||
log.info("Successfully deleted collection " + collection + ", shard: " + message.getStr("name"));
|
log.info("Successfully deleted collection: " + collection + ", shard: " + sliceId);
|
||||||
|
|
||||||
} catch (SolrException e) {
|
} catch (SolrException e) {
|
||||||
throw e;
|
throw e;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Error executing delete operation for collection: " + collection + " shard: " + message.getStr("name"), e);
|
throw new SolrException(ErrorCode.SERVER_ERROR, "Error executing delete operation for collection: " + collection + " shard: " + sliceId, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -329,11 +329,11 @@ public class CollectionsHandler extends RequestHandlerBase {
|
||||||
private void handleDeleteShardAction(SolrQueryRequest req,
|
private void handleDeleteShardAction(SolrQueryRequest req,
|
||||||
SolrQueryResponse rsp) throws InterruptedException, KeeperException {
|
SolrQueryResponse rsp) throws InterruptedException, KeeperException {
|
||||||
log.info("Deleting Shard : " + req.getParamString());
|
log.info("Deleting Shard : " + req.getParamString());
|
||||||
String name = req.getParams().required().get("collection");
|
String name = req.getParams().required().get(ZkStateReader.COLLECTION_PROP);
|
||||||
String shard = req.getParams().required().get("shard");
|
String shard = req.getParams().required().get(ZkStateReader.SHARD_ID_PROP);
|
||||||
|
|
||||||
Map<String,Object> props = new HashMap<String,Object>();
|
Map<String,Object> props = new HashMap<String,Object>();
|
||||||
props.put("collection", name);
|
props.put(ZkStateReader.COLLECTION_PROP, name);
|
||||||
props.put(Overseer.QUEUE_OPERATION, OverseerCollectionProcessor.DELETESHARD);
|
props.put(Overseer.QUEUE_OPERATION, OverseerCollectionProcessor.DELETESHARD);
|
||||||
props.put(ZkStateReader.SHARD_ID_PROP, shard);
|
props.put(ZkStateReader.SHARD_ID_PROP, shard);
|
||||||
|
|
||||||
|
|
|
@ -101,18 +101,18 @@ public class DeleteShardTest extends AbstractFullDistribZkTestBase {
|
||||||
|
|
||||||
deleteShard(SHARD1);
|
deleteShard(SHARD1);
|
||||||
|
|
||||||
confirmShardDeletion();
|
confirmShardDeletion(SHARD1);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void confirmShardDeletion() throws SolrServerException, KeeperException,
|
protected void confirmShardDeletion(String shard) throws SolrServerException, KeeperException,
|
||||||
InterruptedException {
|
InterruptedException {
|
||||||
ZkStateReader zkStateReader = cloudClient.getZkStateReader();
|
ZkStateReader zkStateReader = cloudClient.getZkStateReader();
|
||||||
ClusterState clusterState = null;
|
ClusterState clusterState = zkStateReader.getClusterState();
|
||||||
int counter = 10;
|
int counter = 10;
|
||||||
while (counter-- > 0) {
|
while (counter-- > 0) {
|
||||||
zkStateReader.updateClusterState(true);
|
zkStateReader.updateClusterState(true);
|
||||||
clusterState = zkStateReader.getClusterState();
|
clusterState = zkStateReader.getClusterState();
|
||||||
if (clusterState.getSlice("collection1", SHARD1) == null) {
|
if (clusterState.getSlice("collection1", shard) == null) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
Thread.sleep(1000);
|
Thread.sleep(1000);
|
||||||
|
|
Loading…
Reference in New Issue