mirror of https://github.com/apache/lucene.git
SOLR-13599: additional 'checkpoint' logging to try and help diagnose strange failures
This commit is contained in:
parent
f686167e71
commit
b4a602f6b2
|
@ -271,12 +271,12 @@ public class ReplicationFactorTest extends AbstractFullDistribZkTestBase {
|
|||
}
|
||||
|
||||
protected void testRf3() throws Exception {
|
||||
int numShards = 1;
|
||||
int replicationFactor = 3;
|
||||
int maxShardsPerNode = 1;
|
||||
String testCollectionName = "repfacttest_c8n_1x3";
|
||||
String shardId = "shard1";
|
||||
int minRf = 2;
|
||||
final int numShards = 1;
|
||||
final int replicationFactor = 3;
|
||||
final int maxShardsPerNode = 1;
|
||||
final String testCollectionName = "repfacttest_c8n_1x3";
|
||||
final String shardId = "shard1";
|
||||
final int minRf = 2;
|
||||
|
||||
createCollectionWithRetry(testCollectionName, "conf1", numShards, replicationFactor, maxShardsPerNode);
|
||||
cloudClient.setDefaultCollection(testCollectionName);
|
||||
|
@ -285,6 +285,7 @@ public class ReplicationFactorTest extends AbstractFullDistribZkTestBase {
|
|||
ensureAllReplicasAreActive(testCollectionName, shardId, numShards, replicationFactor, 30);
|
||||
assertTrue("Expected 2 active replicas for "+testCollectionName, replicas.size() == 2);
|
||||
|
||||
log.info("Indexing docId=1");
|
||||
int rf = sendDoc(1, minRf);
|
||||
assertRf(3, "all replicas should be active", rf);
|
||||
|
||||
|
@ -292,9 +293,10 @@ public class ReplicationFactorTest extends AbstractFullDistribZkTestBase {
|
|||
doDBIdWithRetry(3, 5, "deletes should have propagated to all 3 replicas", 1);
|
||||
doDBQWithRetry(3, 5, "deletes should have propagated to all 3 replicas", 1);
|
||||
|
||||
|
||||
log.info("Closing one proxy port");
|
||||
getProxyForReplica(replicas.get(0)).close();
|
||||
|
||||
log.info("Indexing docId=2");
|
||||
rf = sendDoc(2, minRf);
|
||||
assertRf(2, "one replica should be down", rf);
|
||||
|
||||
|
@ -303,8 +305,22 @@ public class ReplicationFactorTest extends AbstractFullDistribZkTestBase {
|
|||
doDBIdWithRetry(2, 5, "deletes should have propagated to 2 replicas", 1);
|
||||
|
||||
|
||||
// SOLR-13599 sanity check if problem is related to sending a batch
|
||||
List<SolrInputDocument> batch = new ArrayList<SolrInputDocument>(10);
|
||||
for (int i=30; i < 45; i++) {
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.addField(id, String.valueOf(i));
|
||||
doc.addField("a_t", "hello" + i);
|
||||
batch.add(doc);
|
||||
}
|
||||
log.info("Indexing batch of documents (30-45)");
|
||||
int batchRf = sendDocsWithRetry(batch, minRf, 5, 1);
|
||||
assertRf(2, "batch should have succeded, only one replica should be down", batchRf);
|
||||
|
||||
log.info("Closing second proxy port");
|
||||
getProxyForReplica(replicas.get(1)).close();
|
||||
|
||||
log.info("Indexing docId=3");
|
||||
rf = sendDoc(3, minRf);
|
||||
assertRf(1, "both replicas should be down", rf);
|
||||
|
||||
|
@ -312,6 +328,7 @@ public class ReplicationFactorTest extends AbstractFullDistribZkTestBase {
|
|||
doDBIdWithRetry(1, 5, "deletes should have propagated to only 1 replica", 1);
|
||||
|
||||
// heal the partitions
|
||||
log.info("Re-opening closed proxy ports");
|
||||
getProxyForReplica(replicas.get(0)).reopen();
|
||||
getProxyForReplica(replicas.get(1)).reopen();
|
||||
|
||||
|
@ -319,14 +336,15 @@ public class ReplicationFactorTest extends AbstractFullDistribZkTestBase {
|
|||
|
||||
ensureAllReplicasAreActive(testCollectionName, shardId, numShards, replicationFactor, 30);
|
||||
|
||||
log.info("Indexing docId=4");
|
||||
rf = sendDoc(4, minRf);
|
||||
assertRf(3, "partitions to replicas have been healed", rf);
|
||||
assertRf(3, "all replicas have been healed", rf);
|
||||
|
||||
doDBQWithRetry(3, 5, "deletes should have propagated to all 3 replicas", 1);
|
||||
doDBIdWithRetry(3, 5, "deletes should have propagated to all 3 replicas", 1);
|
||||
doDBQWithRetry(3, 5, "delete should have propagated to all 3 replicas", 1);
|
||||
doDBIdWithRetry(3, 5, "delete should have propagated to all 3 replicas", 1);
|
||||
|
||||
// now send a batch
|
||||
List<SolrInputDocument> batch = new ArrayList<SolrInputDocument>(10);
|
||||
batch = new ArrayList<SolrInputDocument>(10);
|
||||
for (int i=5; i < 15; i++) {
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.addField(id, String.valueOf(i));
|
||||
|
@ -334,16 +352,24 @@ public class ReplicationFactorTest extends AbstractFullDistribZkTestBase {
|
|||
batch.add(doc);
|
||||
}
|
||||
|
||||
int batchRf = sendDocsWithRetry(batch, minRf, 5, 1);
|
||||
assertRf(3, "batch should have succeeded on all replicas", batchRf);
|
||||
log.info("Indexing batch of documents (5-14)");
|
||||
batchRf = sendDocsWithRetry(batch, minRf, 5, 1);
|
||||
assertRf(3, "batch add should have succeeded on all replicas", batchRf);
|
||||
|
||||
doDBQWithRetry(3, 5, "deletes should have propagated to only 1 replica", 15);
|
||||
doDBIdWithRetry(3, 5, "deletes should have propagated to only 1 replica", 15);
|
||||
doDBQWithRetry(3, 5, "batch deletes should have propagated to all 3 replica", 15);
|
||||
doDBIdWithRetry(3, 5, "batch deletes should have propagated to all 3 replica", 15);
|
||||
|
||||
// add some chaos to the batch
|
||||
log.info("Closing one proxy port (again)");
|
||||
getProxyForReplica(replicas.get(0)).close();
|
||||
|
||||
// now send a batch
|
||||
// send a single doc (again)
|
||||
// SOLR-13599 sanity check if problem is related to "re-closing" a port on the proxy
|
||||
log.info("Indexing docId=5");
|
||||
rf = sendDoc(5, minRf);
|
||||
assertRf(2, "doc should have succeded, only one replica should be down", rf);
|
||||
|
||||
// now send a batch (again)
|
||||
batch = new ArrayList<SolrInputDocument>(10);
|
||||
for (int i=15; i < 30; i++) {
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
|
@ -351,14 +377,15 @@ public class ReplicationFactorTest extends AbstractFullDistribZkTestBase {
|
|||
doc.addField("a_t", "hello" + i);
|
||||
batch.add(doc);
|
||||
}
|
||||
|
||||
log.info("Indexing batch of documents (15-29)");
|
||||
batchRf = sendDocsWithRetry(batch, minRf, 5, 1);
|
||||
assertRf(2, "batch should have succeeded on 2 replicas (only one replica should be down)", batchRf);
|
||||
assertRf(2, "batch should have succeded, only one replica should be down", batchRf);
|
||||
|
||||
doDBQWithRetry(2, 5, "deletes should have propagated to only 1 replica", 15);
|
||||
doDBIdWithRetry(2, 5, "deletes should have propagated to only 1 replica", 15);
|
||||
|
||||
// close the 2nd replica, and send a 3rd batch with expected achieved rf=1
|
||||
log.info("Closing second proxy port (again)");
|
||||
getProxyForReplica(replicas.get(1)).close();
|
||||
|
||||
batch = new ArrayList<SolrInputDocument>(10);
|
||||
|
|
Loading…
Reference in New Issue