SOLR-11424: When legacyCloud=false, cores should not publish itself as DOWN on startup

This commit is contained in:
Cao Manh Dat 2017-10-04 15:25:17 +07:00
parent fb19528f6c
commit 9905736898
4 changed files with 72 additions and 13 deletions

View File

@ -174,6 +174,8 @@ Optimizations
* SOLR-10285: Skip LEADER messages when there are leader only shards (Cao Manh Dat, Joshua Humphries) * SOLR-10285: Skip LEADER messages when there are leader only shards (Cao Manh Dat, Joshua Humphries)
* SOLR-11424: When legacyCloud=false, cores should not publish itself as DOWN on startup. (Cao Manh Dat)
Other Changes Other Changes
---------------------- ----------------------

View File

@ -1471,7 +1471,7 @@ public class ZkController {
return coreNodeName; return coreNodeName;
} }
public void preRegister(CoreDescriptor cd) { public void preRegister(CoreDescriptor cd, boolean publishState) {
String coreNodeName = getCoreNodeName(cd); String coreNodeName = getCoreNodeName(cd);
@ -1487,7 +1487,10 @@ public class ZkController {
cloudDesc.setCoreNodeName(coreNodeName); cloudDesc.setCoreNodeName(coreNodeName);
} }
// publishState == false on startup
if (publishState || isPublishAsDownOnStartup(cloudDesc)) {
publish(cd, Replica.State.DOWN, false, true); publish(cd, Replica.State.DOWN, false, true);
}
String collectionName = cd.getCloudDescriptor().getCollectionName(); String collectionName = cd.getCloudDescriptor().getCollectionName();
DocCollection collection = zkStateReader.getClusterState().getCollectionOrNull(collectionName); DocCollection collection = zkStateReader.getClusterState().getCollectionOrNull(collectionName);
log.debug(collection == null ? log.debug(collection == null ?
@ -1504,13 +1507,26 @@ public class ZkController {
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e); throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
} }
if (cd.getCloudDescriptor().getShardId() == null && needsToBeAssignedShardId(cd, zkStateReader.getClusterState(), coreNodeName)) {
doGetShardIdAndNodeNameProcess(cd);
} else {
// still wait till we see us in local state
doGetShardIdAndNodeNameProcess(cd); doGetShardIdAndNodeNameProcess(cd);
} }
/**
* On startup, the node already published all of its replicas as DOWN,
* so in case of legacyCloud=false ( the replica must already present on Zk )
* we can skip publish the replica as down
* @return Should publish the replica as down on startup
*/
private boolean isPublishAsDownOnStartup(CloudDescriptor cloudDesc) {
if (!Overseer.isLegacy(zkStateReader)) {
Replica replica = zkStateReader.getClusterState().getCollection(cloudDesc.getCollectionName())
.getSlice(cloudDesc.getShardId())
.getReplica(cloudDesc.getCoreNodeName());
if (replica.getNodeName().equals(getNodeName())) {
return false;
}
}
return true;
} }
private void checkStateInZk(CoreDescriptor cd) throws InterruptedException { private void checkStateInZk(CoreDescriptor cd) throws InterruptedException {

View File

@ -1018,7 +1018,7 @@ public class CoreContainer {
MDCLoggingContext.setCoreDescriptor(this, dcore); MDCLoggingContext.setCoreDescriptor(this, dcore);
SolrIdentifierValidator.validateCoreName(dcore.getName()); SolrIdentifierValidator.validateCoreName(dcore.getName());
if (zkSys.getZkController() != null) { if (zkSys.getZkController() != null) {
zkSys.getZkController().preRegister(dcore); zkSys.getZkController().preRegister(dcore, publishState);
} }
ConfigSet coreConfig = coreConfigService.getConfig(dcore); ConfigSet coreConfig = coreConfigService.getConfig(dcore);

View File

@ -27,7 +27,7 @@ import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.response.CollectionAdminResponse; import org.apache.solr.client.solrj.response.CollectionAdminResponse;
import org.junit.BeforeClass; import org.junit.BeforeClass;
public class TestShardsWithSingleReplica extends SolrCloudTestCase { public class TestSkipOverseerOperations extends SolrCloudTestCase {
@BeforeClass @BeforeClass
public static void setupCluster() throws Exception { public static void setupCluster() throws Exception {
@ -56,26 +56,67 @@ public class TestShardsWithSingleReplica extends SolrCloudTestCase {
.process(cluster.getSolrClient()); .process(cluster.getSolrClient());
for (JettySolrRunner solrRunner : notOverseerNodes) { for (JettySolrRunner solrRunner : notOverseerNodes) {
cluster.stopJettySolrRunner(solrRunner); solrRunner.stop();
} }
waitForState("Expected empty liveNodes", collection, waitForState("Expected single liveNode", collection,
(liveNodes, collectionState) -> liveNodes.size() == 1); (liveNodes, collectionState) -> liveNodes.size() == 1);
CollectionAdminResponse resp = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient()); CollectionAdminResponse resp = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient());
for (JettySolrRunner solrRunner : notOverseerNodes) { for (JettySolrRunner solrRunner : notOverseerNodes) {
cluster.startJettySolrRunner(solrRunner); solrRunner.start();
} }
waitForState("Expected 2x1 for collection: " + collection, collection, waitForState("Expected 2x1 for collection: " + collection, collection,
clusterShape(2, 1)); clusterShape(2, 1));
CollectionAdminResponse resp2 = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient()); CollectionAdminResponse resp2 = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient());
assertEquals(getNumLeaderOpeations(resp), getNumLeaderOpeations(resp2)); assertEquals(getNumLeaderOpeations(resp), getNumLeaderOpeations(resp2));
CollectionAdminRequest.deleteCollection(collection).process(cluster.getSolrClient());
}
public void testSkipDownOperations() throws Exception {
String overseerLeader = getOverseerLeader();
List<JettySolrRunner> notOverseerNodes = cluster.getJettySolrRunners()
.stream()
.filter(solrRunner -> !solrRunner.getNodeName().equals(overseerLeader))
.collect(Collectors.toList());
String collection = "collection2";
CollectionAdminRequest
.createCollection(collection, 2, 2)
.setCreateNodeSet(notOverseerNodes
.stream()
.map(JettySolrRunner::getNodeName)
.collect(Collectors.joining(","))
)
.setMaxShardsPerNode(2)
.process(cluster.getSolrClient());
for (JettySolrRunner solrRunner : notOverseerNodes) {
solrRunner.stop();
}
waitForState("Expected single liveNode", collection,
(liveNodes, collectionState) -> liveNodes.size() == 1);
CollectionAdminResponse resp = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient());
for (JettySolrRunner solrRunner : notOverseerNodes) {
solrRunner.start();
}
waitForState("Expected 2x2 for collection: " + collection, collection,
clusterShape(2, 2));
CollectionAdminResponse resp2 = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient());
// 2 for recovering state, 4 for active state
assertEquals(getNumStateOpeations(resp) + 6, getNumStateOpeations(resp2));
CollectionAdminRequest.deleteCollection(collection).process(cluster.getSolrClient());
} }
private int getNumLeaderOpeations(CollectionAdminResponse resp) { private int getNumLeaderOpeations(CollectionAdminResponse resp) {
return (int) resp.getResponse().findRecursive("overseer_operations", "leader", "requests"); return (int) resp.getResponse().findRecursive("overseer_operations", "leader", "requests");
} }
private int getNumStateOpeations(CollectionAdminResponse resp) {
return (int) resp.getResponse().findRecursive("overseer_operations", "state", "requests");
}
private String getOverseerLeader() throws IOException, SolrServerException { private String getOverseerLeader() throws IOException, SolrServerException {
CollectionAdminResponse resp = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient()); CollectionAdminResponse resp = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient());
return (String) resp.getResponse().get("leader"); return (String) resp.getResponse().get("leader");