SOLR-11467: Added extra logging to debug jenkins CDCR failures

This commit is contained in:
Varun Thacker 2017-10-12 20:45:41 -07:00
parent 4bc7fd2bd5
commit 6fecf849b6
4 changed files with 54 additions and 2 deletions

View File

@ -21,6 +21,7 @@ import java.lang.invoke.MethodHandles;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.util.List; import java.util.List;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.request.UpdateRequest;
@ -62,6 +63,9 @@ public class CdcrReplicator implements Runnable {
} }
try { try {
// check what's there in target before sending further
printDocsFromTargetForAdditionalDebugging();
// create update request // create update request
UpdateRequest req = new UpdateRequest(); UpdateRequest req = new UpdateRequest();
// Add the param to indicate the {@link CdcrUpdateProcessor} to keep the provided version number // Add the param to indicate the {@link CdcrUpdateProcessor} to keep the provided version number
@ -77,6 +81,10 @@ public class CdcrReplicator implements Runnable {
Object o = subReader.next(); Object o = subReader.next();
if (o == null) break; // we have reached the end of the update logs, we should close the batch if (o == null) break; // we have reached the end of the update logs, we should close the batch
//additional logging
List entry = (List) o;
log.info("cdcr: current tlog entry in replicator: " + entry);
if (isDelete(o)) { if (isDelete(o)) {
/* /*
@ -140,6 +148,17 @@ public class CdcrReplicator implements Runnable {
state.resetConsecutiveErrors(); state.resetConsecutiveErrors();
} }
private void printDocsFromTargetForAdditionalDebugging() throws IOException, SolrServerException, CdcrReplicatorException {
try {
log.info("cdcr: docs in target: "+ state + " in replicator: "
+ state.getClient().query(new SolrQuery("*:*")).getResults());
} catch (Exception e) {
log.warn("cdcr: exception while querying to target: " + e);
}
}
private boolean isDelete(Object o) { private boolean isDelete(Object o) {
List entry = (List) o; List entry = (List) o;
int operationAndFlags = (Integer) entry.get(0); int operationAndFlags = (Integer) entry.get(0);

View File

@ -416,6 +416,8 @@ public class CdcrRequestHandler extends RequestHandlerBase implements SolrCoreAw
for (final Future<Long> future : parallelExecutor.invokeAll(callables)) { for (final Future<Long> future : parallelExecutor.invokeAll(callables)) {
long version = future.get(); long version = future.get();
//additional logging
log.info("cdcr: shardcheckpoint: versions on target: " + version);
if (version < checkpoint) { // we must take the lowest checkpoint from all the shards if (version < checkpoint) { // we must take the lowest checkpoint from all the shards
checkpoint = version; checkpoint = version;
} }
@ -629,16 +631,23 @@ public class CdcrRequestHandler extends RequestHandlerBase implements SolrCoreAw
} }
CountDownLatch latch = new CountDownLatch(1); // latch to make sure BOOTSTRAP_STATUS gives correct response CountDownLatch latch = new CountDownLatch(1); // latch to make sure BOOTSTRAP_STATUS gives correct response
//additional logging
log.info("cdcr: bootstrap executing for collection: " + collectionName + " and shard: " + shard);
Runnable runnable = () -> { Runnable runnable = () -> {
Lock recoveryLock = req.getCore().getSolrCoreState().getRecoveryLock(); Lock recoveryLock = req.getCore().getSolrCoreState().getRecoveryLock();
boolean locked = recoveryLock.tryLock(); boolean locked = recoveryLock.tryLock();
SolrCoreState coreState = core.getSolrCoreState(); SolrCoreState coreState = core.getSolrCoreState();
try { try {
if (!locked) { if (!locked) {
//additional logging
log.info("cdcr: couldn't acquire lock for bootstrap, issue CANCEL");
handleCancelBootstrap(req, rsp); handleCancelBootstrap(req, rsp);
} else if (leaderStateManager.amILeader()) { } else if (leaderStateManager.amILeader()) {
coreState.setCdcrBootstrapRunning(true); coreState.setCdcrBootstrapRunning(true);
latch.countDown(); // free the latch as current bootstrap is executing latch.countDown(); // free the latch as current bootstrap is executing
//additional logging
log.info("cdcr: acquire lock for bootstrap, latch removed");
//running.set(true); //running.set(true);
String masterUrl = req.getParams().get(ReplicationHandler.MASTER_URL); String masterUrl = req.getParams().get(ReplicationHandler.MASTER_URL);
BootstrapCallable bootstrapCallable = new BootstrapCallable(masterUrl, core); BootstrapCallable bootstrapCallable = new BootstrapCallable(masterUrl, core);
@ -665,6 +674,8 @@ public class CdcrRequestHandler extends RequestHandlerBase implements SolrCoreAw
latch.countDown(); // free the latch as current bootstrap is executing latch.countDown(); // free the latch as current bootstrap is executing
} }
} }
//additional logging
log.info("cdcr: bootstrap status after action: " + coreState.getCdcrBootstrapRunning());
}; };
try { try {

View File

@ -67,6 +67,9 @@ public class CdcrUpdateProcessor extends DistributedUpdateProcessor {
cmd.setFlags(cmd.getFlags() & ~UpdateCommand.PEER_SYNC); cmd.setFlags(cmd.getFlags() & ~UpdateCommand.PEER_SYNC);
} }
//additional logging
log.info("cdcr: version received at target: " + cmd.getVersion());
return result; return result;
} }

View File

@ -269,13 +269,16 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
req.add(doc); req.add(doc);
} }
req.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); req.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
System.out.println("Adding " + docs + " docs with commit=true, numDocs=" + numDocs); log.info("Adding " + docs + " docs with commit=true, numDocs=" + numDocs);
req.process(sourceSolrClient); req.process(sourceSolrClient);
} }
QueryResponse response = sourceSolrClient.query(new SolrQuery("*:*")); QueryResponse response = sourceSolrClient.query(new SolrQuery("*:*"));
assertEquals("", numDocs, response.getResults().getNumFound()); assertEquals("", numDocs, response.getResults().getNumFound());
//additional logging
log.info("cdcr: numDocs after initial indexing in source: " + numDocs);
// setup the target cluster // setup the target cluster
target.uploadConfigSet(configset("cdcr-target"), "cdcr-target"); target.uploadConfigSet(configset("cdcr-target"), "cdcr-target");
CollectionAdminRequest.createCollection("cdcr-target", "cdcr-target", 1, 1) CollectionAdminRequest.createCollection("cdcr-target", "cdcr-target", 1, 1)
@ -284,8 +287,16 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
targetSolrClient.setDefaultCollection("cdcr-target"); targetSolrClient.setDefaultCollection("cdcr-target");
Thread.sleep(1000); Thread.sleep(1000);
//additional logging
log.info("cdcr: numDocs in target-1 : " + targetSolrClient.query(new SolrQuery("*:*")).getResults().getNumFound());
cdcrStart(targetSolrClient); cdcrStart(targetSolrClient);
cdcrStart(sourceSolrClient); cdcrStart(sourceSolrClient);
//additional logging
log.info("cdcr: cdcr is enabled on source and target both, bootstrap should be triggered");
log.info("cdcr: current numDocs in target-2: " + targetSolrClient.query(new SolrQuery("*:*")).getResults().getNumFound());
int c = 0; int c = 0;
for (int k = 0; k < docs; k++) { for (int k = 0; k < docs; k++) {
UpdateRequest req = new UpdateRequest(); UpdateRequest req = new UpdateRequest();
@ -296,13 +307,20 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
req.add(doc); req.add(doc);
} }
req.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); req.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
System.out.println("Adding " + docs + " docs with commit=true, numDocs=" + numDocs); log.info("Adding " + docs + " docs with commit=true, numDocs=" + numDocs);
req.process(sourceSolrClient); req.process(sourceSolrClient);
} }
//additional logging
log.info("cdcr: current numDocs in target-3: " + targetSolrClient.query(new SolrQuery("*:*")).getResults().getNumFound());
response = sourceSolrClient.query(new SolrQuery("*:*")); response = sourceSolrClient.query(new SolrQuery("*:*"));
assertEquals("", numDocs, response.getResults().getNumFound()); assertEquals("", numDocs, response.getResults().getNumFound());
//additional logging
log.info("cdcr: numDocs after second indexing in source: " + numDocs);
log.info("cdcr: current numDocs in target-4: " + targetSolrClient.query(new SolrQuery("*:*")).getResults().getNumFound());
response = getCdcrQueue(sourceSolrClient); response = getCdcrQueue(sourceSolrClient);
System.out.println("Cdcr queue response: " + response.getResponse()); System.out.println("Cdcr queue response: " + response.getResponse());
long foundDocs = waitForTargetToSync(numDocs, targetSolrClient); long foundDocs = waitForTargetToSync(numDocs, targetSolrClient);
@ -323,6 +341,7 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
try { try {
targetSolrClient.commit(); targetSolrClient.commit();
response = targetSolrClient.query(new SolrQuery("*:*")); response = targetSolrClient.query(new SolrQuery("*:*"));
log.info("cdcr: numDocs in waitForTargetToSync: " + response.getResults().getNumFound());
if (response.getResults().getNumFound() == numDocs) { if (response.getResults().getNumFound() == numDocs) {
break; break;
} }