SOLR-11467: Added extra logging to debug jenkins CDCR failures

This commit is contained in:
Varun Thacker 2017-10-12 20:45:41 -07:00
parent 4bc7fd2bd5
commit 6fecf849b6
4 changed files with 54 additions and 2 deletions

View File

@ -21,6 +21,7 @@ import java.lang.invoke.MethodHandles;
import java.nio.charset.Charset;
import java.util.List;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.UpdateRequest;
@ -62,6 +63,9 @@ public class CdcrReplicator implements Runnable {
}
try {
// check what's there in target before sending further
printDocsFromTargetForAdditionalDebugging();
// create update request
UpdateRequest req = new UpdateRequest();
// Add the param to indicate the {@link CdcrUpdateProcessor} to keep the provided version number
@ -77,6 +81,10 @@ public class CdcrReplicator implements Runnable {
Object o = subReader.next();
if (o == null) break; // we have reached the end of the update logs, we should close the batch
//additional logging
List entry = (List) o;
log.info("cdcr: current tlog entry in replicator: " + entry);
if (isDelete(o)) {
/*
@ -140,6 +148,17 @@ public class CdcrReplicator implements Runnable {
state.resetConsecutiveErrors();
}
private void printDocsFromTargetForAdditionalDebugging() throws IOException, SolrServerException, CdcrReplicatorException {
try {
log.info("cdcr: docs in target: "+ state + " in replicator: "
+ state.getClient().query(new SolrQuery("*:*")).getResults());
} catch (Exception e) {
log.warn("cdcr: exception while querying to target: " + e);
}
}
private boolean isDelete(Object o) {
List entry = (List) o;
int operationAndFlags = (Integer) entry.get(0);

View File

@ -416,6 +416,8 @@ public class CdcrRequestHandler extends RequestHandlerBase implements SolrCoreAw
for (final Future<Long> future : parallelExecutor.invokeAll(callables)) {
long version = future.get();
//additional logging
log.info("cdcr: shardcheckpoint: versions on target: " + version);
if (version < checkpoint) { // we must take the lowest checkpoint from all the shards
checkpoint = version;
}
@ -629,16 +631,23 @@ public class CdcrRequestHandler extends RequestHandlerBase implements SolrCoreAw
}
CountDownLatch latch = new CountDownLatch(1); // latch to make sure BOOTSTRAP_STATUS gives correct response
//additional logging
log.info("cdcr: bootstrap executing for collection: " + collectionName + " and shard: " + shard);
Runnable runnable = () -> {
Lock recoveryLock = req.getCore().getSolrCoreState().getRecoveryLock();
boolean locked = recoveryLock.tryLock();
SolrCoreState coreState = core.getSolrCoreState();
try {
if (!locked) {
//additional logging
log.info("cdcr: couldn't acquire lock for bootstrap, issue CANCEL");
handleCancelBootstrap(req, rsp);
} else if (leaderStateManager.amILeader()) {
coreState.setCdcrBootstrapRunning(true);
latch.countDown(); // free the latch as current bootstrap is executing
//additional logging
log.info("cdcr: acquire lock for bootstrap, latch removed");
//running.set(true);
String masterUrl = req.getParams().get(ReplicationHandler.MASTER_URL);
BootstrapCallable bootstrapCallable = new BootstrapCallable(masterUrl, core);
@ -665,6 +674,8 @@ public class CdcrRequestHandler extends RequestHandlerBase implements SolrCoreAw
latch.countDown(); // free the latch as current bootstrap is executing
}
}
//additional logging
log.info("cdcr: bootstrap status after action: " + coreState.getCdcrBootstrapRunning());
};
try {

View File

@ -67,6 +67,9 @@ public class CdcrUpdateProcessor extends DistributedUpdateProcessor {
cmd.setFlags(cmd.getFlags() & ~UpdateCommand.PEER_SYNC);
}
//additional logging
log.info("cdcr: version received at target: " + cmd.getVersion());
return result;
}

View File

@ -269,13 +269,16 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
req.add(doc);
}
req.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
System.out.println("Adding " + docs + " docs with commit=true, numDocs=" + numDocs);
log.info("Adding " + docs + " docs with commit=true, numDocs=" + numDocs);
req.process(sourceSolrClient);
}
QueryResponse response = sourceSolrClient.query(new SolrQuery("*:*"));
assertEquals("", numDocs, response.getResults().getNumFound());
//additional logging
log.info("cdcr: numDocs after initial indexing in source: " + numDocs);
// setup the target cluster
target.uploadConfigSet(configset("cdcr-target"), "cdcr-target");
CollectionAdminRequest.createCollection("cdcr-target", "cdcr-target", 1, 1)
@ -284,8 +287,16 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
targetSolrClient.setDefaultCollection("cdcr-target");
Thread.sleep(1000);
//additional logging
log.info("cdcr: numDocs in target-1 : " + targetSolrClient.query(new SolrQuery("*:*")).getResults().getNumFound());
cdcrStart(targetSolrClient);
cdcrStart(sourceSolrClient);
//additional logging
log.info("cdcr: cdcr is enabled on source and target both, bootstrap should be triggered");
log.info("cdcr: current numDocs in target-2: " + targetSolrClient.query(new SolrQuery("*:*")).getResults().getNumFound());
int c = 0;
for (int k = 0; k < docs; k++) {
UpdateRequest req = new UpdateRequest();
@ -296,13 +307,20 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
req.add(doc);
}
req.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
System.out.println("Adding " + docs + " docs with commit=true, numDocs=" + numDocs);
log.info("Adding " + docs + " docs with commit=true, numDocs=" + numDocs);
req.process(sourceSolrClient);
}
//additional logging
log.info("cdcr: current numDocs in target-3: " + targetSolrClient.query(new SolrQuery("*:*")).getResults().getNumFound());
response = sourceSolrClient.query(new SolrQuery("*:*"));
assertEquals("", numDocs, response.getResults().getNumFound());
//additional logging
log.info("cdcr: numDocs after second indexing in source: " + numDocs);
log.info("cdcr: current numDocs in target-4: " + targetSolrClient.query(new SolrQuery("*:*")).getResults().getNumFound());
response = getCdcrQueue(sourceSolrClient);
System.out.println("Cdcr queue response: " + response.getResponse());
long foundDocs = waitForTargetToSync(numDocs, targetSolrClient);
@ -323,6 +341,7 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
try {
targetSolrClient.commit();
response = targetSolrClient.query(new SolrQuery("*:*"));
log.info("cdcr: numDocs in waitForTargetToSync: " + response.getResults().getNumFound());
if (response.getResults().getNumFound() == numDocs) {
break;
}