mirror of https://github.com/apache/lucene.git
SOLR-11424: When legacyCloud=false, cores should not publish itself as DOWN on startup
This commit is contained in:
parent
fb19528f6c
commit
9905736898
|
@ -174,6 +174,8 @@ Optimizations
|
|||
|
||||
* SOLR-10285: Skip LEADER messages when there are leader only shards (Cao Manh Dat, Joshua Humphries)
|
||||
|
||||
* SOLR-11424: When legacyCloud=false, cores should not publish itself as DOWN on startup. (Cao Manh Dat)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -1471,7 +1471,7 @@ public class ZkController {
|
|||
return coreNodeName;
|
||||
}
|
||||
|
||||
public void preRegister(CoreDescriptor cd) {
|
||||
public void preRegister(CoreDescriptor cd, boolean publishState) {
|
||||
|
||||
String coreNodeName = getCoreNodeName(cd);
|
||||
|
||||
|
@ -1487,7 +1487,10 @@ public class ZkController {
|
|||
cloudDesc.setCoreNodeName(coreNodeName);
|
||||
}
|
||||
|
||||
publish(cd, Replica.State.DOWN, false, true);
|
||||
// publishState == false on startup
|
||||
if (publishState || isPublishAsDownOnStartup(cloudDesc)) {
|
||||
publish(cd, Replica.State.DOWN, false, true);
|
||||
}
|
||||
String collectionName = cd.getCloudDescriptor().getCollectionName();
|
||||
DocCollection collection = zkStateReader.getClusterState().getCollectionOrNull(collectionName);
|
||||
log.debug(collection == null ?
|
||||
|
@ -1504,15 +1507,28 @@ public class ZkController {
|
|||
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
|
||||
}
|
||||
|
||||
if (cd.getCloudDescriptor().getShardId() == null && needsToBeAssignedShardId(cd, zkStateReader.getClusterState(), coreNodeName)) {
|
||||
doGetShardIdAndNodeNameProcess(cd);
|
||||
} else {
|
||||
// still wait till we see us in local state
|
||||
doGetShardIdAndNodeNameProcess(cd);
|
||||
}
|
||||
doGetShardIdAndNodeNameProcess(cd);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* On startup, the node already published all of its replicas as DOWN,
|
||||
* so in case of legacyCloud=false ( the replica must already present on Zk )
|
||||
* we can skip publish the replica as down
|
||||
* @return Should publish the replica as down on startup
|
||||
*/
|
||||
private boolean isPublishAsDownOnStartup(CloudDescriptor cloudDesc) {
|
||||
if (!Overseer.isLegacy(zkStateReader)) {
|
||||
Replica replica = zkStateReader.getClusterState().getCollection(cloudDesc.getCollectionName())
|
||||
.getSlice(cloudDesc.getShardId())
|
||||
.getReplica(cloudDesc.getCoreNodeName());
|
||||
if (replica.getNodeName().equals(getNodeName())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private void checkStateInZk(CoreDescriptor cd) throws InterruptedException {
|
||||
if (!Overseer.isLegacy(zkStateReader)) {
|
||||
CloudDescriptor cloudDesc = cd.getCloudDescriptor();
|
||||
|
|
|
@ -1018,7 +1018,7 @@ public class CoreContainer {
|
|||
MDCLoggingContext.setCoreDescriptor(this, dcore);
|
||||
SolrIdentifierValidator.validateCoreName(dcore.getName());
|
||||
if (zkSys.getZkController() != null) {
|
||||
zkSys.getZkController().preRegister(dcore);
|
||||
zkSys.getZkController().preRegister(dcore, publishState);
|
||||
}
|
||||
|
||||
ConfigSet coreConfig = coreConfigService.getConfig(dcore);
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
|||
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
public class TestShardsWithSingleReplica extends SolrCloudTestCase {
|
||||
public class TestSkipOverseerOperations extends SolrCloudTestCase {
|
||||
|
||||
@BeforeClass
|
||||
public static void setupCluster() throws Exception {
|
||||
|
@ -56,26 +56,67 @@ public class TestShardsWithSingleReplica extends SolrCloudTestCase {
|
|||
.process(cluster.getSolrClient());
|
||||
|
||||
for (JettySolrRunner solrRunner : notOverseerNodes) {
|
||||
cluster.stopJettySolrRunner(solrRunner);
|
||||
solrRunner.stop();
|
||||
}
|
||||
waitForState("Expected empty liveNodes", collection,
|
||||
waitForState("Expected single liveNode", collection,
|
||||
(liveNodes, collectionState) -> liveNodes.size() == 1);
|
||||
|
||||
CollectionAdminResponse resp = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient());
|
||||
for (JettySolrRunner solrRunner : notOverseerNodes) {
|
||||
cluster.startJettySolrRunner(solrRunner);
|
||||
solrRunner.start();
|
||||
}
|
||||
|
||||
waitForState("Expected 2x1 for collection: " + collection, collection,
|
||||
clusterShape(2, 1));
|
||||
CollectionAdminResponse resp2 = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient());
|
||||
assertEquals(getNumLeaderOpeations(resp), getNumLeaderOpeations(resp2));
|
||||
CollectionAdminRequest.deleteCollection(collection).process(cluster.getSolrClient());
|
||||
}
|
||||
|
||||
public void testSkipDownOperations() throws Exception {
|
||||
String overseerLeader = getOverseerLeader();
|
||||
List<JettySolrRunner> notOverseerNodes = cluster.getJettySolrRunners()
|
||||
.stream()
|
||||
.filter(solrRunner -> !solrRunner.getNodeName().equals(overseerLeader))
|
||||
.collect(Collectors.toList());
|
||||
String collection = "collection2";
|
||||
CollectionAdminRequest
|
||||
.createCollection(collection, 2, 2)
|
||||
.setCreateNodeSet(notOverseerNodes
|
||||
.stream()
|
||||
.map(JettySolrRunner::getNodeName)
|
||||
.collect(Collectors.joining(","))
|
||||
)
|
||||
.setMaxShardsPerNode(2)
|
||||
.process(cluster.getSolrClient());
|
||||
|
||||
for (JettySolrRunner solrRunner : notOverseerNodes) {
|
||||
solrRunner.stop();
|
||||
}
|
||||
waitForState("Expected single liveNode", collection,
|
||||
(liveNodes, collectionState) -> liveNodes.size() == 1);
|
||||
|
||||
CollectionAdminResponse resp = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient());
|
||||
for (JettySolrRunner solrRunner : notOverseerNodes) {
|
||||
solrRunner.start();
|
||||
}
|
||||
|
||||
waitForState("Expected 2x2 for collection: " + collection, collection,
|
||||
clusterShape(2, 2));
|
||||
CollectionAdminResponse resp2 = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient());
|
||||
// 2 for recovering state, 4 for active state
|
||||
assertEquals(getNumStateOpeations(resp) + 6, getNumStateOpeations(resp2));
|
||||
CollectionAdminRequest.deleteCollection(collection).process(cluster.getSolrClient());
|
||||
}
|
||||
|
||||
private int getNumLeaderOpeations(CollectionAdminResponse resp) {
|
||||
return (int) resp.getResponse().findRecursive("overseer_operations", "leader", "requests");
|
||||
}
|
||||
|
||||
private int getNumStateOpeations(CollectionAdminResponse resp) {
|
||||
return (int) resp.getResponse().findRecursive("overseer_operations", "state", "requests");
|
||||
}
|
||||
|
||||
private String getOverseerLeader() throws IOException, SolrServerException {
|
||||
CollectionAdminResponse resp = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient());
|
||||
return (String) resp.getResponse().get("leader");
|
Loading…
Reference in New Issue