SOLR-5135: Harden Collection API deletion of /collections/$collection ZooKeeper node

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1513864 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Robert Miller 2013-08-14 13:11:14 +00:00
parent b03231693d
commit 97b418391f
4 changed files with 102 additions and 51 deletions

View File

@ -120,6 +120,9 @@ Bug Fixes
* SOLR-5133: HdfsUpdateLog can fail to close a FileSystem instance if init
is called more than once. (Mark Miller)
* SOLR-5135: Harden Collection API deletion of /collections/$collection
ZooKeeper node. (Mark Miller)
Optimizations
----------------------

View File

@ -17,8 +17,17 @@ package org.apache.solr.cloud;
* the License.
*/
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.cloud.ClosableThread;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection;
@ -36,16 +45,6 @@ import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
/**
* Cluster leader. Responsible node assignments, cluster state file?
*/
@ -295,17 +294,6 @@ public class Overseer {
final String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
assert collection.length() > 0 : message;
try {
if (!zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection, true)) {
log.warn("Could not find collection node for " + collection + ", skipping publish state");
}
} catch (KeeperException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
String coreNodeName = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP);
if (coreNodeName == null) {
coreNodeName = getAssignedCoreNodeName(state, message);

View File

@ -232,16 +232,21 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
return new OverseerSolrResponse(results);
}
private void deleteCollection(ZkNodeProps message, NamedList results) throws KeeperException, InterruptedException {
private void deleteCollection(ZkNodeProps message, NamedList results)
throws KeeperException, InterruptedException {
String collection = message.getStr("name");
try {
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminAction.UNLOAD.toString());
params.set(CoreAdminParams.DELETE_INSTANCE_DIR, true);
params.set(CoreAdminParams.DELETE_DATA_DIR, true);
collectionCmd(zkStateReader.getClusterState(), message, params, results, null);
collectionCmd(zkStateReader.getClusterState(), message, params, results,
null);
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION,
Overseer.REMOVECOLLECTION, "name", message.getStr("name"));
Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(m));
Overseer.REMOVECOLLECTION, "name", collection);
Overseer.getInQueue(zkStateReader.getZkClient()).offer(
ZkStateReader.toJSON(m));
// wait for a while until we don't see the collection
long now = System.currentTimeMillis();
@ -249,14 +254,35 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
boolean removed = false;
while (System.currentTimeMillis() < timeout) {
Thread.sleep(100);
removed = !zkStateReader.getClusterState().getCollections().contains(message.getStr("name"));
removed = !zkStateReader.getClusterState().getCollections()
.contains(message.getStr("name"));
if (removed) {
Thread.sleep(100); // just a bit of time so it's more likely other readers see on return
Thread.sleep(100); // just a bit of time so it's more likely other
// readers see on return
break;
}
}
if (!removed) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Could not fully remove collection: " + message.getStr("name"));
throw new SolrException(ErrorCode.SERVER_ERROR,
"Could not fully remove collection: " + message.getStr("name"));
}
} finally {
try {
if (zkStateReader.getZkClient().exists(
ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection, true)) {
zkStateReader.getZkClient().clean(
ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection);
}
} catch (InterruptedException e) {
SolrException.log(log, "Cleaning up collection in zk was interrupted:"
+ collection, e);
Thread.currentThread().interrupt();
} catch (KeeperException e) {
SolrException.log(log, "Problem cleaning up collection in zk:"
+ collection, e);
}
}
}
@ -992,11 +1018,6 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread {
DocCollection coll = clusterState.getCollection(collectionName);
if (coll == null) {
throw new SolrException(ErrorCode.BAD_REQUEST,
"Could not find collection:" + collectionName);
}
for (Map.Entry<String,Slice> entry : coll.getSlicesMap().entrySet()) {
Slice slice = entry.getValue();
sliceCmd(clusterState, params, stateMatcher, slice);

View File

@ -25,6 +25,7 @@ import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrServer;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.impl.HttpSolrServer.RemoteSolrException;
import org.apache.solr.client.solrj.request.CoreAdminRequest;
import org.apache.solr.client.solrj.request.CoreAdminRequest.Create;
import org.apache.solr.client.solrj.request.QueryRequest;
@ -57,6 +58,7 @@ import org.junit.BeforeClass;
import javax.management.MBeanServer;
import javax.management.MBeanServerFactory;
import javax.management.ObjectName;
import java.io.File;
import java.io.IOException;
import java.lang.management.ManagementFactory;
@ -147,12 +149,49 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa
testCollectionsAPI();
testErrorHandling();
deletePartiallyCreatedCollection();
deleteCollectionRemovesStaleZkCollectionsNode();
// last
deleteCollectionWithDownNodes();
if (DEBUG) {
super.printLayout();
}
}
private void deleteCollectionRemovesStaleZkCollectionsNode() throws Exception {
// we can use this client because we just want base url
final String baseUrl = getBaseUrl((HttpSolrServer) clients.get(0));
String collectionName = "out_of_sync_collection";
List<Integer> numShardsNumReplicaList = new ArrayList<Integer>();
numShardsNumReplicaList.add(2);
numShardsNumReplicaList.add(1);
cloudClient.getZkStateReader().getZkClient().makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collectionName, true);
ModifiableSolrParams params = new ModifiableSolrParams();
params.set("action", CollectionAction.DELETE.toString());
params.set("name", collectionName);
QueryRequest request = new QueryRequest(params);
request.setPath("/admin/collections");
try {
NamedList<Object> resp = createNewSolrServer("", baseUrl)
.request(request);
fail("Expected to fail, because collection is not in clusterstate");
} catch (RemoteSolrException e) {
}
checkForMissingCollection(collectionName);
assertFalse(cloudClient.getZkStateReader().getZkClient().exists(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collectionName, true));
}
private void deletePartiallyCreatedCollection() throws Exception {
final String baseUrl = getBaseUrl((HttpSolrServer) clients.get(0));
String collectionName = "halfdeletedcollection";