SOLR-10126: Leader may trigger LIR on replicas, cause the test failure

This commit is contained in:
Cao Manh Dat 2017-08-10 17:36:21 +07:00
parent 0320f20e16
commit 5cdf89d272
1 changed files with 23 additions and 3 deletions

View File

@ -48,6 +48,7 @@ import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.metrics.SolrMetricManager;
import org.apache.solr.util.TimeOut;
import org.junit.Test;
@ -77,6 +78,7 @@ public class PeerSyncReplicationTest extends AbstractFullDistribZkTestBase {
if (!success) {
printLayoutOnTearDown = true;
}
System.clearProperty("distribUpdateSoTimeout");
System.clearProperty("solr.directoryFactory");
System.clearProperty("solr.ulog.numRecordsToKeep");
System.clearProperty("tests.zk.violationReportAction");
@ -95,6 +97,8 @@ public class PeerSyncReplicationTest extends AbstractFullDistribZkTestBase {
@Override
public void distribSetUp() throws Exception {
// set socket timeout small, so replica won't be put into LIR state when they restart
System.setProperty("distribUpdateSoTimeout", "3000");
// tlog gets deleted after node restarts if we use CachingDirectoryFactory.
// make sure that tlog stays intact after we restart a node
System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory");
@ -204,14 +208,18 @@ public class PeerSyncReplicationTest extends AbstractFullDistribZkTestBase {
class IndexInBackGround extends Thread {
private int numDocs;
private CloudJettyRunner runner;
public IndexInBackGround(int numDocs) {
public IndexInBackGround(int numDocs, CloudJettyRunner nodeToBringUp) {
super(getClassName());
this.numDocs = numDocs;
this.runner = nodeToBringUp;
}
public void run() {
try {
// If we don't wait for cores get loaded, the leader may put this replica into LIR state
waitForCoreLoading();
for (int i = 0; i < numDocs; i++) {
indexDoc(id, docId, i1, 50, tlong, 50, t1, "document number " + docId);
docId++;
@ -223,6 +231,17 @@ public class PeerSyncReplicationTest extends AbstractFullDistribZkTestBase {
//Throwing an error here will kill the thread
}
}
private void waitForCoreLoading() throws InterruptedException {
while (true) {
if (runner.jetty.getCoreContainer() != null) {
CoreContainer cc = runner.jetty.getCoreContainer();
cc.waitForLoadingCoresToFinish(20000);
break;
}
Thread.sleep(100);
}
}
}
@ -278,8 +297,9 @@ public class PeerSyncReplicationTest extends AbstractFullDistribZkTestBase {
throws Exception {
// disable fingerprint check if needed
System.setProperty("solr.disableFingerprint", String.valueOf(disableFingerprint));
IndexInBackGround iib = new IndexInBackGround(50);
// we wait a little bit, so socket between leader -> replica will be timeout
Thread.sleep(3000);
IndexInBackGround iib = new IndexInBackGround(50, nodeToBringUp);
iib.start();
// bring back dead node and ensure it recovers