diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 5689312943c..63c407c263e 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -132,6 +132,8 @@ Bug Fixes * SOLR-4005: If CoreContainer fails to register a created core, it should close it. (Mark Miller) +* SOLR-4009: OverseerCollectionProcessor is not resiliant to many error conditions + and can stop running on errors. (milesli, Mark Miller) Other Changes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java index 00d1fd06f68..c8033d4446d 100644 --- a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java +++ b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java @@ -80,7 +80,7 @@ public class OverseerCollectionProcessor implements Runnable { @Override public void run() { - log.info("Process current queue of collection creations"); + log.info("Process current queue of collection messages"); while (amILeader() && !isClosed) { try { byte[] head = workQueue.peek(true); @@ -88,13 +88,20 @@ public class OverseerCollectionProcessor implements Runnable { //if (head != null) { // should not happen since we block above final ZkNodeProps message = ZkNodeProps.load(head); final String operation = message.getStr(QUEUE_OPERATION); - + try { boolean success = processMessage(message, operation); if (!success) { // TODO: what to do on failure / partial failure // if we fail, do we clean up then ? - SolrException.log(log, "Collection creation of " + message.getStr("name") + " failed"); + SolrException.log(log, + "Collection " + operation + " of " + message.getStr("name") + + " failed"); } + } catch(Throwable t) { + SolrException.log(log, + "Collection " + operation + " of " + message.getStr("name") + + " failed", t); + } //} workQueue.remove(); } catch (KeeperException e) { diff --git a/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java b/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java index 62403800cc3..6fa53f5d094 100644 --- a/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java @@ -714,6 +714,40 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase { //collectionNameList.remove(collectionName); + // remove an unknown collection + params = new ModifiableSolrParams(); + params.set("action", CollectionAction.DELETE.toString()); + params.set("name", "unknown_collection"); + request = new QueryRequest(params); + request.setPath("/admin/collections"); + + createNewSolrServer("", baseUrl).request(request); + + // create another collection should still work + params = new ModifiableSolrParams(); + params.set("action", CollectionAction.CREATE.toString()); + + params.set("numShards", 1); + params.set(OverseerCollectionProcessor.REPLICATION_FACTOR, 1); + collectionName = "acollectionafterbaddelete"; + + params.set("name", collectionName); + request = new QueryRequest(params); + request.setPath("/admin/collections"); + createNewSolrServer("", baseUrl).request(request); + + checkForCollection(collectionName, 1); + + url = getUrlFromZk(collectionName); + + collectionClient = new HttpSolrServer(url); + + // poll for a second - it can take a moment before we are ready to serve + waitForNon403or404or503(collectionClient); + + for (int j = 0; j < cnt; j++) { + waitForRecoveriesToFinish(collectionName, zkStateReader, false); + } }