mirror of https://github.com/apache/lucene.git
SOLR-11467: Added extra logging to debug jenkins CDCR failures
This commit is contained in:
parent
4bc7fd2bd5
commit
6fecf849b6
|
@ -21,6 +21,7 @@ import java.lang.invoke.MethodHandles;
|
|||
import java.nio.charset.Charset;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
|
@ -62,6 +63,9 @@ public class CdcrReplicator implements Runnable {
|
|||
}
|
||||
|
||||
try {
|
||||
// check what's there in target before sending further
|
||||
printDocsFromTargetForAdditionalDebugging();
|
||||
|
||||
// create update request
|
||||
UpdateRequest req = new UpdateRequest();
|
||||
// Add the param to indicate the {@link CdcrUpdateProcessor} to keep the provided version number
|
||||
|
@ -77,6 +81,10 @@ public class CdcrReplicator implements Runnable {
|
|||
Object o = subReader.next();
|
||||
if (o == null) break; // we have reached the end of the update logs, we should close the batch
|
||||
|
||||
//additional logging
|
||||
List entry = (List) o;
|
||||
log.info("cdcr: current tlog entry in replicator: " + entry);
|
||||
|
||||
if (isDelete(o)) {
|
||||
|
||||
/*
|
||||
|
@ -140,6 +148,17 @@ public class CdcrReplicator implements Runnable {
|
|||
state.resetConsecutiveErrors();
|
||||
}
|
||||
|
||||
|
||||
private void printDocsFromTargetForAdditionalDebugging() throws IOException, SolrServerException, CdcrReplicatorException {
|
||||
try {
|
||||
log.info("cdcr: docs in target: "+ state + " in replicator: "
|
||||
+ state.getClient().query(new SolrQuery("*:*")).getResults());
|
||||
} catch (Exception e) {
|
||||
log.warn("cdcr: exception while querying to target: " + e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private boolean isDelete(Object o) {
|
||||
List entry = (List) o;
|
||||
int operationAndFlags = (Integer) entry.get(0);
|
||||
|
|
|
@ -416,6 +416,8 @@ public class CdcrRequestHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
|
||||
for (final Future<Long> future : parallelExecutor.invokeAll(callables)) {
|
||||
long version = future.get();
|
||||
//additional logging
|
||||
log.info("cdcr: shardcheckpoint: versions on target: " + version);
|
||||
if (version < checkpoint) { // we must take the lowest checkpoint from all the shards
|
||||
checkpoint = version;
|
||||
}
|
||||
|
@ -629,16 +631,23 @@ public class CdcrRequestHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
}
|
||||
CountDownLatch latch = new CountDownLatch(1); // latch to make sure BOOTSTRAP_STATUS gives correct response
|
||||
|
||||
//additional logging
|
||||
log.info("cdcr: bootstrap executing for collection: " + collectionName + " and shard: " + shard);
|
||||
|
||||
Runnable runnable = () -> {
|
||||
Lock recoveryLock = req.getCore().getSolrCoreState().getRecoveryLock();
|
||||
boolean locked = recoveryLock.tryLock();
|
||||
SolrCoreState coreState = core.getSolrCoreState();
|
||||
try {
|
||||
if (!locked) {
|
||||
//additional logging
|
||||
log.info("cdcr: couldn't acquire lock for bootstrap, issue CANCEL");
|
||||
handleCancelBootstrap(req, rsp);
|
||||
} else if (leaderStateManager.amILeader()) {
|
||||
coreState.setCdcrBootstrapRunning(true);
|
||||
latch.countDown(); // free the latch as current bootstrap is executing
|
||||
//additional logging
|
||||
log.info("cdcr: acquire lock for bootstrap, latch removed");
|
||||
//running.set(true);
|
||||
String masterUrl = req.getParams().get(ReplicationHandler.MASTER_URL);
|
||||
BootstrapCallable bootstrapCallable = new BootstrapCallable(masterUrl, core);
|
||||
|
@ -665,6 +674,8 @@ public class CdcrRequestHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
latch.countDown(); // free the latch as current bootstrap is executing
|
||||
}
|
||||
}
|
||||
//additional logging
|
||||
log.info("cdcr: bootstrap status after action: " + coreState.getCdcrBootstrapRunning());
|
||||
};
|
||||
|
||||
try {
|
||||
|
|
|
@ -67,6 +67,9 @@ public class CdcrUpdateProcessor extends DistributedUpdateProcessor {
|
|||
cmd.setFlags(cmd.getFlags() & ~UpdateCommand.PEER_SYNC);
|
||||
}
|
||||
|
||||
//additional logging
|
||||
log.info("cdcr: version received at target: " + cmd.getVersion());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -269,13 +269,16 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
|
|||
req.add(doc);
|
||||
}
|
||||
req.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
|
||||
System.out.println("Adding " + docs + " docs with commit=true, numDocs=" + numDocs);
|
||||
log.info("Adding " + docs + " docs with commit=true, numDocs=" + numDocs);
|
||||
req.process(sourceSolrClient);
|
||||
}
|
||||
|
||||
QueryResponse response = sourceSolrClient.query(new SolrQuery("*:*"));
|
||||
assertEquals("", numDocs, response.getResults().getNumFound());
|
||||
|
||||
//additional logging
|
||||
log.info("cdcr: numDocs after initial indexing in source: " + numDocs);
|
||||
|
||||
// setup the target cluster
|
||||
target.uploadConfigSet(configset("cdcr-target"), "cdcr-target");
|
||||
CollectionAdminRequest.createCollection("cdcr-target", "cdcr-target", 1, 1)
|
||||
|
@ -284,8 +287,16 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
|
|||
targetSolrClient.setDefaultCollection("cdcr-target");
|
||||
Thread.sleep(1000);
|
||||
|
||||
//additional logging
|
||||
log.info("cdcr: numDocs in target-1 : " + targetSolrClient.query(new SolrQuery("*:*")).getResults().getNumFound());
|
||||
|
||||
cdcrStart(targetSolrClient);
|
||||
cdcrStart(sourceSolrClient);
|
||||
|
||||
//additional logging
|
||||
log.info("cdcr: cdcr is enabled on source and target both, bootstrap should be triggered");
|
||||
log.info("cdcr: current numDocs in target-2: " + targetSolrClient.query(new SolrQuery("*:*")).getResults().getNumFound());
|
||||
|
||||
int c = 0;
|
||||
for (int k = 0; k < docs; k++) {
|
||||
UpdateRequest req = new UpdateRequest();
|
||||
|
@ -296,13 +307,20 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
|
|||
req.add(doc);
|
||||
}
|
||||
req.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
|
||||
System.out.println("Adding " + docs + " docs with commit=true, numDocs=" + numDocs);
|
||||
log.info("Adding " + docs + " docs with commit=true, numDocs=" + numDocs);
|
||||
req.process(sourceSolrClient);
|
||||
}
|
||||
|
||||
//additional logging
|
||||
log.info("cdcr: current numDocs in target-3: " + targetSolrClient.query(new SolrQuery("*:*")).getResults().getNumFound());
|
||||
|
||||
response = sourceSolrClient.query(new SolrQuery("*:*"));
|
||||
assertEquals("", numDocs, response.getResults().getNumFound());
|
||||
|
||||
//additional logging
|
||||
log.info("cdcr: numDocs after second indexing in source: " + numDocs);
|
||||
log.info("cdcr: current numDocs in target-4: " + targetSolrClient.query(new SolrQuery("*:*")).getResults().getNumFound());
|
||||
|
||||
response = getCdcrQueue(sourceSolrClient);
|
||||
System.out.println("Cdcr queue response: " + response.getResponse());
|
||||
long foundDocs = waitForTargetToSync(numDocs, targetSolrClient);
|
||||
|
@ -323,6 +341,7 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
|
|||
try {
|
||||
targetSolrClient.commit();
|
||||
response = targetSolrClient.query(new SolrQuery("*:*"));
|
||||
log.info("cdcr: numDocs in waitForTargetToSync: " + response.getResults().getNumFound());
|
||||
if (response.getResults().getNumFound() == numDocs) {
|
||||
break;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue