cancelRecovery wasn't being called, check for close between small sleeps, set closed even if we get an exception talking to ZK

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1341899 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2012-05-23 15:31:46 +00:00
parent c24efa0a9b
commit d8b9a75631
3 changed files with 23 additions and 19 deletions

View File

@ -92,6 +92,7 @@ public class RecoveryStrategy extends Thread implements SafeStopThread {
// make sure any threads stop retrying
public void close() {
close = true;
log.warn("Stopping recovery for core " + coreName + " zkNodeName=" + coreZkNodeName);
}
@ -99,9 +100,12 @@ public class RecoveryStrategy extends Thread implements SafeStopThread {
final ZkController zkController, final String baseUrl,
final String shardZkNodeName, final CoreDescriptor cd) {
SolrException.log(log, "Recovery failed - I give up.");
try {
zkController.publishAsRecoveryFailed(baseUrl, cd,
shardZkNodeName, core.getName());
close = true;
} finally {
close();
}
}
private void replicate(String nodeName, SolrCore core, ZkNodeProps leaderprops, String baseUrl)
@ -393,11 +397,15 @@ public class RecoveryStrategy extends Thread implements SafeStopThread {
}
} catch (Exception e) {
SolrException.log(log, "", e);
log.error("", e);
}
try {
Thread.sleep(Math.min(START_TIMEOUT * retries, 60000));
// if (!isClosed()) Thread.sleep(Math.min(START_TIMEOUT * retries, 60000));
for (int i = 0; i<Math.min(retries, 600); i++) {
if (isClosed()) break; // check if someone closed us
Thread.sleep(START_TIMEOUT);
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
log.warn("Recovery was interrupted", e);

View File

@ -582,23 +582,20 @@ public class CoreContainer
}
private void cancelCoreRecoveries() {
ArrayList<SolrCoreState> coreStates = null;
ArrayList<SolrCoreState> coreStates = new ArrayList<SolrCoreState>();
synchronized (cores) {
for (SolrCore core : cores.values()) {
try {
coreStates = new ArrayList<SolrCoreState>(cores.size());
// make sure we wait for any recoveries to stop
coreStates.add(core.getUpdateHandler().getSolrCoreState());
} catch (Throwable t) {
SolrException.log(log, "Error canceling recovery for core", t);
}
}
}
// we must cancel without holding the cores sync
if (coreStates != null) {
// make sure we wait for any recoveries to stop
for (SolrCoreState coreState : coreStates) {
try {
coreState.cancelRecovery();
} catch (Throwable t) {
SolrException.log(log, "Error canceling recovery for core", t);
}
}
}

View File

@ -153,7 +153,6 @@ public final class DefaultSolrCoreState extends SolrCoreState {
synchronized (recoveryLock) {
if (recoveryStrat != null) {
recoveryStrat.close();
try {
recoveryStrat.join();
} catch (InterruptedException e) {