SOLR-11424: When legacyCloud=false, cores should not publish itself as DOWN on startup

This commit is contained in:
Cao Manh Dat 2017-10-04 15:25:17 +07:00
parent fb19528f6c
commit 9905736898
4 changed files with 72 additions and 13 deletions

View File

@ -174,6 +174,8 @@ Optimizations
* SOLR-10285: Skip LEADER messages when there are leader only shards (Cao Manh Dat, Joshua Humphries)
* SOLR-11424: When legacyCloud=false, cores should not publish itself as DOWN on startup. (Cao Manh Dat)
Other Changes
----------------------

View File

@ -1471,7 +1471,7 @@ public class ZkController {
return coreNodeName;
}
public void preRegister(CoreDescriptor cd) {
public void preRegister(CoreDescriptor cd, boolean publishState) {
String coreNodeName = getCoreNodeName(cd);
@ -1487,7 +1487,10 @@ public class ZkController {
cloudDesc.setCoreNodeName(coreNodeName);
}
publish(cd, Replica.State.DOWN, false, true);
// publishState == false on startup
if (publishState || isPublishAsDownOnStartup(cloudDesc)) {
publish(cd, Replica.State.DOWN, false, true);
}
String collectionName = cd.getCloudDescriptor().getCollectionName();
DocCollection collection = zkStateReader.getClusterState().getCollectionOrNull(collectionName);
log.debug(collection == null ?
@ -1504,15 +1507,28 @@ public class ZkController {
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
}
if (cd.getCloudDescriptor().getShardId() == null && needsToBeAssignedShardId(cd, zkStateReader.getClusterState(), coreNodeName)) {
doGetShardIdAndNodeNameProcess(cd);
} else {
// still wait till we see us in local state
doGetShardIdAndNodeNameProcess(cd);
}
doGetShardIdAndNodeNameProcess(cd);
}
/**
* On startup, the node already published all of its replicas as DOWN,
* so in case of legacyCloud=false ( the replica must already present on Zk )
* we can skip publish the replica as down
* @return Should publish the replica as down on startup
*/
private boolean isPublishAsDownOnStartup(CloudDescriptor cloudDesc) {
if (!Overseer.isLegacy(zkStateReader)) {
Replica replica = zkStateReader.getClusterState().getCollection(cloudDesc.getCollectionName())
.getSlice(cloudDesc.getShardId())
.getReplica(cloudDesc.getCoreNodeName());
if (replica.getNodeName().equals(getNodeName())) {
return false;
}
}
return true;
}
private void checkStateInZk(CoreDescriptor cd) throws InterruptedException {
if (!Overseer.isLegacy(zkStateReader)) {
CloudDescriptor cloudDesc = cd.getCloudDescriptor();

View File

@ -1018,7 +1018,7 @@ public class CoreContainer {
MDCLoggingContext.setCoreDescriptor(this, dcore);
SolrIdentifierValidator.validateCoreName(dcore.getName());
if (zkSys.getZkController() != null) {
zkSys.getZkController().preRegister(dcore);
zkSys.getZkController().preRegister(dcore, publishState);
}
ConfigSet coreConfig = coreConfigService.getConfig(dcore);

View File

@ -27,7 +27,7 @@ import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
import org.junit.BeforeClass;
public class TestShardsWithSingleReplica extends SolrCloudTestCase {
public class TestSkipOverseerOperations extends SolrCloudTestCase {
@BeforeClass
public static void setupCluster() throws Exception {
@ -56,26 +56,67 @@ public class TestShardsWithSingleReplica extends SolrCloudTestCase {
.process(cluster.getSolrClient());
for (JettySolrRunner solrRunner : notOverseerNodes) {
cluster.stopJettySolrRunner(solrRunner);
solrRunner.stop();
}
waitForState("Expected empty liveNodes", collection,
waitForState("Expected single liveNode", collection,
(liveNodes, collectionState) -> liveNodes.size() == 1);
CollectionAdminResponse resp = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient());
for (JettySolrRunner solrRunner : notOverseerNodes) {
cluster.startJettySolrRunner(solrRunner);
solrRunner.start();
}
waitForState("Expected 2x1 for collection: " + collection, collection,
clusterShape(2, 1));
CollectionAdminResponse resp2 = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient());
assertEquals(getNumLeaderOpeations(resp), getNumLeaderOpeations(resp2));
CollectionAdminRequest.deleteCollection(collection).process(cluster.getSolrClient());
}
public void testSkipDownOperations() throws Exception {
String overseerLeader = getOverseerLeader();
List<JettySolrRunner> notOverseerNodes = cluster.getJettySolrRunners()
.stream()
.filter(solrRunner -> !solrRunner.getNodeName().equals(overseerLeader))
.collect(Collectors.toList());
String collection = "collection2";
CollectionAdminRequest
.createCollection(collection, 2, 2)
.setCreateNodeSet(notOverseerNodes
.stream()
.map(JettySolrRunner::getNodeName)
.collect(Collectors.joining(","))
)
.setMaxShardsPerNode(2)
.process(cluster.getSolrClient());
for (JettySolrRunner solrRunner : notOverseerNodes) {
solrRunner.stop();
}
waitForState("Expected single liveNode", collection,
(liveNodes, collectionState) -> liveNodes.size() == 1);
CollectionAdminResponse resp = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient());
for (JettySolrRunner solrRunner : notOverseerNodes) {
solrRunner.start();
}
waitForState("Expected 2x2 for collection: " + collection, collection,
clusterShape(2, 2));
CollectionAdminResponse resp2 = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient());
// 2 for recovering state, 4 for active state
assertEquals(getNumStateOpeations(resp) + 6, getNumStateOpeations(resp2));
CollectionAdminRequest.deleteCollection(collection).process(cluster.getSolrClient());
}
private int getNumLeaderOpeations(CollectionAdminResponse resp) {
return (int) resp.getResponse().findRecursive("overseer_operations", "leader", "requests");
}
private int getNumStateOpeations(CollectionAdminResponse resp) {
return (int) resp.getResponse().findRecursive("overseer_operations", "state", "requests");
}
private String getOverseerLeader() throws IOException, SolrServerException {
CollectionAdminResponse resp = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient());
return (String) resp.getResponse().get("leader");