SOLR-9036: Solr slave is doing full replication (entire index) of index after master restart

This commit is contained in:
Shalin Shekhar Mangar 2016-05-05 21:00:20 +05:30
parent 18d933ee65
commit 51b131950d
3 changed files with 84 additions and 1 deletions

View File

@ -184,6 +184,9 @@ Bug Fixes
* SOLR-9030: The 'downnode' overseer command can trip asserts in ZkStateWriter. * SOLR-9030: The 'downnode' overseer command can trip asserts in ZkStateWriter.
(Scott Blum, Mark Miller, shalin) (Scott Blum, Mark Miller, shalin)
* SOLR-9036: Solr slave is doing full replication (entire index) of index after master restart.
(Lior Sapir, Mark Miller, shalin)
Optimizations Optimizations
---------------------- ----------------------
* SOLR-8722: Don't force a full ZkStateReader refresh on every Overseer operation. * SOLR-8722: Don't force a full ZkStateReader refresh on every Overseer operation.

View File

@ -565,7 +565,10 @@ public class IndexFetcher {
} }
} }
if (core.getCoreDescriptor().getCoreContainer().isZooKeeperAware()) {
// we only track replication success in SolrCloud mode
core.getUpdateHandler().getSolrCoreState().setLastReplicateIndexSuccess(successfulInstall); core.getUpdateHandler().getSolrCoreState().setLastReplicateIndexSuccess(successfulInstall);
}
filesToDownload = filesDownloaded = confFilesDownloaded = confFilesToDownload = tlogFilesToDownload = tlogFilesDownloaded = null; filesToDownload = filesDownloaded = confFilesDownloaded = confFilesToDownload = tlogFilesToDownload = tlogFilesDownloaded = null;
markReplicationStop(); markReplicationStop();

View File

@ -58,6 +58,7 @@ import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
@ -586,6 +587,82 @@ public class TestReplicationHandler extends SolrTestCaseJ4 {
assertEquals(nDocs+1, numFound(rQuery(nDocs+1, "*:*", slaveClient))); assertEquals(nDocs+1, numFound(rQuery(nDocs+1, "*:*", slaveClient)));
} }
/**
* We assert that if master is down for more than poll interval,
* the slave doesn't re-fetch the whole index from master again if
* the index hasn't changed. See SOLR-9036
*/
@Test
public void doTestIndexFetchOnMasterRestart() throws Exception {
useFactory(null);
try {
clearIndexWithReplication();
// change solrconfig having 'replicateAfter startup' option on master
master.copyConfigFile(CONF_DIR + "solrconfig-master2.xml",
"solrconfig.xml");
masterJetty.stop();
masterJetty.start();
nDocs--;
for (int i = 0; i < nDocs; i++)
index(masterClient, "id", i, "name", "name = " + i);
masterClient.commit();
NamedList masterQueryRsp = rQuery(nDocs, "*:*", masterClient);
SolrDocumentList masterQueryResult = (SolrDocumentList) masterQueryRsp.get("response");
assertEquals(nDocs, numFound(masterQueryRsp));
//get docs from slave and check if number is equal to master
NamedList slaveQueryRsp = rQuery(nDocs, "*:*", slaveClient);
SolrDocumentList slaveQueryResult = (SolrDocumentList) slaveQueryRsp.get("response");
assertEquals(nDocs, numFound(slaveQueryRsp));
//compare results
String cmp = BaseDistributedSearchTestCase.compare(masterQueryResult, slaveQueryResult, 0, null);
assertEquals(null, cmp);
assertEquals(1, Integer.parseInt(getSlaveDetails("timesIndexReplicated")));
String timesFailed = getSlaveDetails("timesFailed");
assertEquals(0, Integer.parseInt(timesFailed != null ? timesFailed : "0"));
masterJetty.stop();
// poll interval on slave is 1 second, so we just sleep for a few seconds
Thread.sleep(2000);
masterJetty.start();
// poll interval on slave is 1 second, so we just sleep for a few seconds
Thread.sleep(2000);
//get docs from slave and assert that they are still the same as before
slaveQueryRsp = rQuery(nDocs, "*:*", slaveClient);
slaveQueryResult = (SolrDocumentList) slaveQueryRsp.get("response");
assertEquals(nDocs, numFound(slaveQueryRsp));
int failed = Integer.parseInt(getSlaveDetails("timesFailed"));
assertTrue(failed > 0);
assertEquals(1, Integer.parseInt(getSlaveDetails("timesIndexReplicated")) - failed);
} finally {
resetFactory();
}
}
private String getSlaveDetails(String keyName) throws SolrServerException, IOException {
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CommonParams.QT, "/replication");
params.set("command", "details");
QueryResponse response = slaveClient.query(params);
System.out.println("SHALIN: " + response.getResponse());
// details/slave/timesIndexReplicated
NamedList<Object> details = (NamedList<Object>) response.getResponse().get("details");
NamedList<Object> slave = (NamedList<Object>) details.get("slave");
Object o = slave.get(keyName);
return o != null ? o.toString() : null;
}
@Test @Test
public void doTestIndexFetchWithMasterUrl() throws Exception { public void doTestIndexFetchWithMasterUrl() throws Exception {
//change solrconfig on slave //change solrconfig on slave