SOLR-4009: OverseerCollectionProcessor is not resiliant to many error conditions and can stop running on errors.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1403480 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Robert Miller 2012-10-29 20:25:22 +00:00
parent 9370f3584f
commit 6c5fb54052
3 changed files with 46 additions and 3 deletions

View File

@ -132,6 +132,8 @@ Bug Fixes
* SOLR-4005: If CoreContainer fails to register a created core, it should close it.
(Mark Miller)
* SOLR-4009: OverseerCollectionProcessor is not resiliant to many error conditions
and can stop running on errors. (milesli, Mark Miller)
Other Changes
----------------------

View File

@ -80,7 +80,7 @@ public class OverseerCollectionProcessor implements Runnable {
@Override
public void run() {
log.info("Process current queue of collection creations");
log.info("Process current queue of collection messages");
while (amILeader() && !isClosed) {
try {
byte[] head = workQueue.peek(true);
@ -88,13 +88,20 @@ public class OverseerCollectionProcessor implements Runnable {
//if (head != null) { // should not happen since we block above
final ZkNodeProps message = ZkNodeProps.load(head);
final String operation = message.getStr(QUEUE_OPERATION);
try {
boolean success = processMessage(message, operation);
if (!success) {
// TODO: what to do on failure / partial failure
// if we fail, do we clean up then ?
SolrException.log(log, "Collection creation of " + message.getStr("name") + " failed");
SolrException.log(log,
"Collection " + operation + " of " + message.getStr("name")
+ " failed");
}
} catch(Throwable t) {
SolrException.log(log,
"Collection " + operation + " of " + message.getStr("name")
+ " failed", t);
}
//}
workQueue.remove();
} catch (KeeperException e) {

View File

@ -714,6 +714,40 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
//collectionNameList.remove(collectionName);
// remove an unknown collection
params = new ModifiableSolrParams();
params.set("action", CollectionAction.DELETE.toString());
params.set("name", "unknown_collection");
request = new QueryRequest(params);
request.setPath("/admin/collections");
createNewSolrServer("", baseUrl).request(request);
// create another collection should still work
params = new ModifiableSolrParams();
params.set("action", CollectionAction.CREATE.toString());
params.set("numShards", 1);
params.set(OverseerCollectionProcessor.REPLICATION_FACTOR, 1);
collectionName = "acollectionafterbaddelete";
params.set("name", collectionName);
request = new QueryRequest(params);
request.setPath("/admin/collections");
createNewSolrServer("", baseUrl).request(request);
checkForCollection(collectionName, 1);
url = getUrlFromZk(collectionName);
collectionClient = new HttpSolrServer(url);
// poll for a second - it can take a moment before we are ready to serve
waitForNon403or404or503(collectionClient);
for (int j = 0; j < cnt; j++) {
waitForRecoveriesToFinish(collectionName, zkStateReader, false);
}
}