SOLR-3620: attempt to address shutdown deadlock - once the CoreContainer starts shutting down, do not start any new recoveries - there was a small window this could happen in previously

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1361310 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Robert Miller 2012-07-13 18:05:09 +00:00
parent 732ad20130
commit 0e32691abc
3 changed files with 31 additions and 24 deletions

View File

@ -565,6 +565,8 @@ public class CoreContainer
*/
public void shutdown() {
log.info("Shutting down CoreContainer instance="+System.identityHashCode(this));
isShutDown = true;
if (isZooKeeperAware()) {
cancelCoreRecoveries();
}
@ -589,7 +591,6 @@ public class CoreContainer
if (shardHandlerFactory != null) {
shardHandlerFactory.close();
}
isShutDown = true;
}
}
}

View File

@ -81,11 +81,10 @@ public final class DefaultSolrCoreState extends SolrCoreState {
} catch (Throwable t) {
log.error("Error during shutdown of directory factory.", t);
}
try {
cancelRecovery();
} catch (Throwable t) {
log.error("Error cancelling recovery", t);
}
// TODO: we cannot cancel recovery here if its a CoreContainer shutdown
// it can cause deadlock - but perhaps we want to if we are stopping early
// and CoreContainer is not being shutdown?
closed = true;
}
@ -125,6 +124,11 @@ public final class DefaultSolrCoreState extends SolrCoreState {
return;
}
if (cc.isShutDown()) {
log.warn("Skipping recovery because Solr is shutdown");
return;
}
cancelRecovery();
synchronized (recoveryLock) {
while (recoveryRunning) {

View File

@ -137,7 +137,9 @@ public class BasicDistributedZkTest extends AbstractDistributedZkTestCase {
// setLoggingLevel(null);
del("*:*");
CloudSolrServer server = new CloudSolrServer(zkServer.getZkAddress());
server.setDefaultCollection(DEFAULT_COLLECTION);
solrj = server;
indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men"
,"foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d);
indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country."
@ -290,21 +292,21 @@ public class BasicDistributedZkTest extends AbstractDistributedZkTestCase {
query("q", "id:[1 TO 5]", CommonParams.DEBUG, CommonParams.RESULTS);
query("q", "id:[1 TO 5]", CommonParams.DEBUG, CommonParams.QUERY);
// TODO: This test currently fails because debug info is obtained only
// on shards with matches.
// query("q","matchesnothing","fl","*,score", "debugQuery", "true");
// would be better if these where all separate tests - but much, much
// slower
doOptimisticLockingAndUpdating();
testMultipleCollections();
testANewCollectionInOneInstance();
testSearchByCollectionName();
testANewCollectionInOneInstanceWithManualShardAssignement();
testNumberOfCommitsWithCommitAfterAdd();
testUpdateProcessorsRunOnlyOnce("distrib-dup-test-chain-explicit");
testUpdateProcessorsRunOnlyOnce("distrib-dup-test-chain-implicit");
// // TODO: This test currently fails because debug info is obtained only
// // on shards with matches.
// // query("q","matchesnothing","fl","*,score", "debugQuery", "true");
//
// // would be better if these where all separate tests - but much, much
// // slower
// doOptimisticLockingAndUpdating();
// testMultipleCollections();
// testANewCollectionInOneInstance();
// testSearchByCollectionName();
// testANewCollectionInOneInstanceWithManualShardAssignement();
// testNumberOfCommitsWithCommitAfterAdd();
//
// testUpdateProcessorsRunOnlyOnce("distrib-dup-test-chain-explicit");
// testUpdateProcessorsRunOnlyOnce("distrib-dup-test-chain-implicit");
testCollectionsAPI();
@ -323,7 +325,7 @@ public class BasicDistributedZkTest extends AbstractDistributedZkTestCase {
// create new collections rapid fire
Map<String,List<Integer>> collectionInfos = new HashMap<String,List<Integer>>();
int cnt = atLeast(3);
int cnt = atLeast(9);
for (int i = 0; i < cnt; i++) {
ModifiableSolrParams params = new ModifiableSolrParams();
params.set("action", CollectionAction.CREATE.toString());
@ -528,7 +530,7 @@ public class BasicDistributedZkTest extends AbstractDistributedZkTestCase {
private void checkForCollection(String collectionName, int expectedSlices)
throws Exception {
// check for an expectedSlices new collection - we poll the state
long timeoutAt = System.currentTimeMillis() + 60000;
long timeoutAt = System.currentTimeMillis() + 120000;
boolean found = false;
boolean sliceMatch = false;
while (System.currentTimeMillis() < timeoutAt) {