mirror of https://github.com/apache/lucene.git
SOLR-10126: Leader may trigger LIR on replicas, cause the test failure
This commit is contained in:
parent
0320f20e16
commit
5cdf89d272
|
@ -48,6 +48,7 @@ import org.apache.solr.common.cloud.Replica;
|
|||
import org.apache.solr.common.cloud.Slice;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.core.CoreContainer;
|
||||
import org.apache.solr.metrics.SolrMetricManager;
|
||||
import org.apache.solr.util.TimeOut;
|
||||
import org.junit.Test;
|
||||
|
@ -77,6 +78,7 @@ public class PeerSyncReplicationTest extends AbstractFullDistribZkTestBase {
|
|||
if (!success) {
|
||||
printLayoutOnTearDown = true;
|
||||
}
|
||||
System.clearProperty("distribUpdateSoTimeout");
|
||||
System.clearProperty("solr.directoryFactory");
|
||||
System.clearProperty("solr.ulog.numRecordsToKeep");
|
||||
System.clearProperty("tests.zk.violationReportAction");
|
||||
|
@ -95,6 +97,8 @@ public class PeerSyncReplicationTest extends AbstractFullDistribZkTestBase {
|
|||
|
||||
@Override
|
||||
public void distribSetUp() throws Exception {
|
||||
// set socket timeout small, so replica won't be put into LIR state when they restart
|
||||
System.setProperty("distribUpdateSoTimeout", "3000");
|
||||
// tlog gets deleted after node restarts if we use CachingDirectoryFactory.
|
||||
// make sure that tlog stays intact after we restart a node
|
||||
System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory");
|
||||
|
@ -204,14 +208,18 @@ public class PeerSyncReplicationTest extends AbstractFullDistribZkTestBase {
|
|||
|
||||
class IndexInBackGround extends Thread {
|
||||
private int numDocs;
|
||||
private CloudJettyRunner runner;
|
||||
|
||||
public IndexInBackGround(int numDocs) {
|
||||
public IndexInBackGround(int numDocs, CloudJettyRunner nodeToBringUp) {
|
||||
super(getClassName());
|
||||
this.numDocs = numDocs;
|
||||
this.runner = nodeToBringUp;
|
||||
}
|
||||
|
||||
public void run() {
|
||||
try {
|
||||
// If we don't wait for cores get loaded, the leader may put this replica into LIR state
|
||||
waitForCoreLoading();
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
indexDoc(id, docId, i1, 50, tlong, 50, t1, "document number " + docId);
|
||||
docId++;
|
||||
|
@ -223,6 +231,17 @@ public class PeerSyncReplicationTest extends AbstractFullDistribZkTestBase {
|
|||
//Throwing an error here will kill the thread
|
||||
}
|
||||
}
|
||||
|
||||
private void waitForCoreLoading() throws InterruptedException {
|
||||
while (true) {
|
||||
if (runner.jetty.getCoreContainer() != null) {
|
||||
CoreContainer cc = runner.jetty.getCoreContainer();
|
||||
cc.waitForLoadingCoresToFinish(20000);
|
||||
break;
|
||||
}
|
||||
Thread.sleep(100);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -278,8 +297,9 @@ public class PeerSyncReplicationTest extends AbstractFullDistribZkTestBase {
|
|||
throws Exception {
|
||||
// disable fingerprint check if needed
|
||||
System.setProperty("solr.disableFingerprint", String.valueOf(disableFingerprint));
|
||||
|
||||
IndexInBackGround iib = new IndexInBackGround(50);
|
||||
// we wait a little bit, so socket between leader -> replica will be timeout
|
||||
Thread.sleep(3000);
|
||||
IndexInBackGround iib = new IndexInBackGround(50, nodeToBringUp);
|
||||
iib.start();
|
||||
|
||||
// bring back dead node and ensure it recovers
|
||||
|
|
Loading…
Reference in New Issue