SOLR-11278: Fix race-condition in CDCR submitting a bootstrap call and checking it's status

This commit is contained in:
Varun Thacker 2017-09-19 09:30:07 -07:00
parent cec5b418ea
commit 8e12f20113
2 changed files with 6 additions and 3 deletions

View File

@ -312,7 +312,9 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
timeOut = new TimeOut(BOOTSTRAP_TIMEOUT_SECONDS, TimeUnit.SECONDS); // reset the timer
retries++;
}
} else if (status == BootstrapStatus.NOTFOUND) {
} else if (status == BootstrapStatus.NOTFOUND || status == BootstrapStatus.CANCELLED) {
log.info("CDCR bootstrap " + (status == BootstrapStatus.NOTFOUND ? "not found" : "cancelled") + "in {} seconds",
BOOTSTRAP_TIMEOUT_SECONDS - timeOut.timeLeft(TimeUnit.SECONDS));
// the leader of the target shard may have changed and therefore there is no record of the
// bootstrap process so we must retry the operation
while (!closed && sendBootstrapCommand() != BootstrapStatus.SUBMITTED) {
@ -320,7 +322,9 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
}
retries = 1;
timeOut = new TimeOut(6L * 3600L * 3600L, TimeUnit.SECONDS); // reset the timer
} else if (status == BootstrapStatus.UNKNOWN) {
} else if (status == BootstrapStatus.UNKNOWN || status == BootstrapStatus.SUBMITTED) {
log.info("CDCR bootstrap is " + (status == BootstrapStatus.UNKNOWN ? "unknown" : "submitted"),
BOOTSTRAP_TIMEOUT_SECONDS - timeOut.timeLeft(TimeUnit.SECONDS));
// we were not able to query the status on the remote end
// so just sleep for a bit and try again
Thread.sleep(BOOTSTRAP_RETRY_DELAY_MS);

View File

@ -239,7 +239,6 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
}
}
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11278")
public void testBootstrapWithContinousIndexingOnSourceCluster() throws Exception {
// start the target first so that we know its zkhost
MiniSolrCloudCluster target = new MiniSolrCloudCluster(1, createTempDir("cdcr-target"), buildJettyConfig("/solr"));