SOLR-8983: Cleanup clusterstate in case of a failed CREATE collection call

This commit is contained in:
anshum 2016-04-15 15:14:30 -07:00
parent 9aefe193c9
commit 84a6ff697e
4 changed files with 127 additions and 8 deletions

View File

@ -129,6 +129,9 @@ Bug Fixes
* SOLR-9004: Fix "name" field type definition in films example. (Alexandre Rafalovitch via Varun Thacker)
* SOLR-8983: Cleanup clusterstate and replicas for a failed create collection request
(Varun Thacker, Anshum Gupta)
Optimizations
----------------------
* SOLR-8722: Don't force a full ZkStateReader refresh on every Overseer operation.

View File

@ -1961,10 +1961,16 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
}
processResponses(results, shardHandler, false, null, async, requestMap, Collections.emptySet());
log.debug("Finished create command on all shards for collection: "
+ collectionName);
if(results.get("failure") != null && ((SimpleOrderedMap)results.get("failure")).size() > 0) {
// Let's cleanup as we hit an exception
// We shouldn't be passing 'results' here for the cleanup as the response would then contain 'success'
// element, which may be interpreted by the user as a positive ack
cleanupCollection(collectionName, new NamedList());
log.info("Cleaned up artifacts for failed create collection for [" + collectionName + "]");
} else {
log.debug("Finished create command on all shards for collection: "
+ collectionName);
}
} catch (SolrException ex) {
throw ex;
} catch (Exception ex) {
@ -1972,6 +1978,15 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
}
}
private void cleanupCollection(String collectionName, NamedList results) throws KeeperException, InterruptedException {
log.error("Cleaning up collection [" + collectionName + "]." );
Map<String, Object> props = makeMap(
Overseer.QUEUE_OPERATION, DELETE.toLower(),
NAME, collectionName);
deleteCollection(new ZkNodeProps(props), results);
}
private Map<Position, String> identifyNodes(ClusterState clusterState,
List<String> nodeList,
ZkNodeProps message,

View File

@ -26,8 +26,19 @@ import java.lang.management.ManagementFactory;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.util.LuceneTestCase.Slow;
@ -39,7 +50,6 @@ import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient.RemoteSolrException;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.CoreAdminRequest;
import org.apache.solr.client.solrj.request.CoreAdminRequest.Create;
@ -375,6 +385,14 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa
}
private NamedList<Object> makeRequest(String baseUrl, SolrRequest request, int socketTimeout)
throws SolrServerException, IOException {
try (SolrClient client = createNewSolrClient("", baseUrl)) {
((HttpSolrClient) client).setSoTimeout(socketTimeout);
return client.request(request);
}
}
private NamedList<Object> makeRequest(String baseUrl, SolrRequest request)
throws SolrServerException, IOException {
try (SolrClient client = createNewSolrClient("", baseUrl)) {
@ -525,8 +543,7 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa
params.set(OverseerCollectionMessageHandler.CREATE_NODE_SET, nn1 + "," + nn2);
request = new QueryRequest(params);
request.setPath("/admin/collections");
gotExp = false;
NamedList<Object> resp = makeRequest(baseUrl, request);;
NamedList<Object> resp = makeRequest(baseUrl, request, 60000);
SimpleOrderedMap success = (SimpleOrderedMap) resp.get("success");
SimpleOrderedMap failure = (SimpleOrderedMap) resp.get("failure");

View File

@ -0,0 +1,84 @@
package org.apache.solr.cloud;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.Properties;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
import org.apache.solr.common.params.CoreAdminParams;
import org.junit.BeforeClass;
import org.junit.Test;
public class CreateCollectionCleanupTest extends SolrCloudTestCase {
protected static final String CLOUD_SOLR_XML_WITH_10S_CREATE_COLL_WAIT = "<solr>\n" +
"\n" +
" <str name=\"shareSchema\">${shareSchema:false}</str>\n" +
" <str name=\"configSetBaseDir\">${configSetBaseDir:configsets}</str>\n" +
" <str name=\"coreRootDirectory\">${coreRootDirectory:.}</str>\n" +
"\n" +
" <shardHandlerFactory name=\"shardHandlerFactory\" class=\"HttpShardHandlerFactory\">\n" +
" <str name=\"urlScheme\">${urlScheme:}</str>\n" +
" <int name=\"socketTimeout\">${socketTimeout:90000}</int>\n" +
" <int name=\"connTimeout\">${connTimeout:15000}</int>\n" +
" </shardHandlerFactory>\n" +
"\n" +
" <solrcloud>\n" +
" <str name=\"host\">127.0.0.1</str>\n" +
" <int name=\"hostPort\">${hostPort:8983}</int>\n" +
" <str name=\"hostContext\">${hostContext:solr}</str>\n" +
" <int name=\"zkClientTimeout\">${solr.zkclienttimeout:30000}</int>\n" +
" <bool name=\"genericCoreNodeNames\">${genericCoreNodeNames:true}</bool>\n" +
" <int name=\"leaderVoteWait\">10000</int>\n" +
" <int name=\"distribUpdateConnTimeout\">${distribUpdateConnTimeout:45000}</int>\n" +
" <int name=\"distribUpdateSoTimeout\">${distribUpdateSoTimeout:340000}</int>\n" +
" <int name=\"createCollectionWaitTimeTillActive\">${createCollectionWaitTimeTillActive:10}</int>\n" +
" </solrcloud>\n" +
" \n" +
"</solr>\n";
@BeforeClass
public static void createCluster() throws Exception {
configureCluster(1)
.addConfig("conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
.withSolrXml(CLOUD_SOLR_XML_WITH_10S_CREATE_COLL_WAIT)
.configure();
}
@Test
public void testCreateCollectionCleanup() throws Exception {
final CloudSolrClient cloudClient = cluster.getSolrClient();
// Create a collection that would fail
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection("foo","conf1",1,1);
Properties properties = new Properties();
properties.put(CoreAdminParams.DATA_DIR, "/some_invalid_dir/foo");
create.setProperties(properties);
CollectionAdminResponse rsp = create.process(cloudClient);
assertFalse(rsp.isSuccess());
// Confirm using LIST that the collection does not exist
CollectionAdminRequest.List list = CollectionAdminRequest.listCollections();
rsp = list.process(cloudClient);
assertFalse(((ArrayList) rsp.getResponse().get("collections")).contains("foo"));
}
}